.src_unaligned:
ADD_HFYU_LEFT_LOOP 0, 0
+
+%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
+ movd m4, maskq
+ punpcklwd m4, m4
+ punpcklwd m4, m4
+ punpcklwd m4, m4
+ add wq, wq
+ test wq, 2*mmsize - 1
+ jz %%.tomainloop
+%%.wordloop:
+ sub wq, 2
+ mov ax, [srcq+wq]
+ add ax, [dstq+wq]
+ and ax, maskw
+ mov [dstq+wq], ax
+ test wq, 2*mmsize - 1
+ jnz %%.wordloop
+%%.tomainloop:
+ add srcq, wq
+ add dstq, wq
+ neg wq
+ jz %%.end
+%%.loop:
+%if %1
+ mova m0, [srcq+wq]
+ mova m1, [dstq+wq]
+ mova m2, [srcq+wq+mmsize]
+ mova m3, [dstq+wq+mmsize]
+%else
+ movu m0, [srcq+wq]
+ movu m1, [dstq+wq]
+ movu m2, [srcq+wq+mmsize]
+ movu m3, [dstq+wq+mmsize]
+%endif
+ paddw m0, m1
+ paddw m2, m3
+ pand m0, m4
+ pand m2, m4
+%if %1
+ mova [dstq+wq] , m0
+ mova [dstq+wq+mmsize], m2
+%else
+ movu [dstq+wq] , m0
+ movu [dstq+wq+mmsize], m2
+%endif
+ add wq, 2*mmsize
+ jl %%.loop
+%%.end:
+ RET
+%endmacro
+
+INIT_MMX mmx
+cglobal add_int16, 4,4,5, dst, src, mask, w
+ ADD_INT16_LOOP 1
+
+INIT_XMM sse2
+cglobal add_int16, 4,4,5, dst, src, mask, w
+ test srcq, mmsize-1
+ jnz .unaligned
+ test dstq, mmsize-1
+ jnz .unaligned
+ ADD_INT16_LOOP 1
+.unaligned:
+ ADD_INT16_LOOP 0
+
;-----------------------------------------------------------------------------
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
; int32_t max, unsigned int len)
#endif /* HAVE_MMX_INLINE */
#if HAVE_MMX_EXTERNAL
+ c->add_int16 = ff_add_int16_mmx;
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
#endif /* HAVE_MMX_EXTERNAL */
}
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
}
c->bswap_buf = ff_bswap32_buf_sse2;
+
+ c->add_int16 = ff_add_int16_sse2;
#endif /* HAVE_SSE2_EXTERNAL */
}
void ff_clear_blocks_sse(int16_t *blocks);
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
+void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,