-; int ff_add_hfyu_left_pred(uint8_t *dst, const uint8_t *src, int w, int left)
-INIT_MMX ssse3
-cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
-.skip_prologue:
- mova m5, [pb_7]
- mova m4, [pb_zzzz3333zzzzbbbb]
- mova m3, [pb_zz11zz55zz99zzdd]
- movd m0, leftm
- psllq m0, 56
- ADD_HFYU_LEFT_LOOP 1, 1
-
-INIT_XMM sse4
-cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
- mova m5, [pb_15]
- mova m6, [pb_zzzzzzzz77777777]
- mova m4, [pb_zzzz3333zzzzbbbb]
- mova m3, [pb_zz11zz55zz99zzdd]
- movd m0, leftm
- pslldq m0, 15
- test srcq, 15
- jnz .src_unaligned
- test dstq, 15
- jnz .dst_unaligned
- ADD_HFYU_LEFT_LOOP 1, 1
-.dst_unaligned:
- ADD_HFYU_LEFT_LOOP 0, 1
-.src_unaligned:
- ADD_HFYU_LEFT_LOOP 0, 0
-
-%macro ADD_BYTES 0
-cglobal add_bytes, 3,4,2, dst, src, w, size
- mov sizeq, wq
- and sizeq, -2*mmsize
- jz .2
- add dstq, sizeq
- add srcq, sizeq
- neg sizeq
-.1:
- mova m0, [srcq + sizeq]
- mova m1, [srcq + sizeq + mmsize]
- paddb m0, [dstq + sizeq]
- paddb m1, [dstq + sizeq + mmsize]
- mova [dstq + sizeq], m0
- mova [dstq + sizeq + mmsize], m1
- add sizeq, 2*mmsize
- jl .1
-.2:
- and wq, 2*mmsize-1
- jz .end
- add dstq, wq
- add srcq, wq
- neg wq
-.3:
- mov sizeb, [srcq + wq]
- add [dstq + wq], sizeb
- inc wq
- jl .3
-.end:
- REP_RET
-%endmacro
-