-
-; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
-INIT_MMX mmxext
-cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top
- add wd, wd
- movd mm6, maskd
- SPLATW mm6, mm6
- movq mm0, [topq]
- movq mm2, mm0
- movd mm4, [left_topq]
- psllq mm2, 16
- movq mm1, mm0
- por mm4, mm2
- movd mm3, [leftq]
- psubw mm0, mm4 ; t-tl
- add dstq, wq
- add topq, wq
- add diffq, wq
- neg wq
- jmp .skip
-.loop:
- movq mm4, [topq+wq]
- movq mm0, mm4
- psllq mm4, 16
- por mm4, mm1
- movq mm1, mm0 ; t
- psubw mm0, mm4 ; t-tl
-.skip:
- movq mm2, [diffq+wq]
-%assign i 0
-%rep 4
- movq mm4, mm0
- paddw mm4, mm3 ; t-tl+l
- pand mm4, mm6
- movq mm5, mm3
- pmaxsw mm3, mm1
- pminsw mm5, mm1
- pminsw mm3, mm4
- pmaxsw mm3, mm5 ; median
- paddw mm3, mm2 ; +residual
- pand mm3, mm6
-%if i==0
- movq mm7, mm3
- psllq mm7, 48
-%else
- movq mm4, mm3
- psrlq mm7, 16
- psllq mm4, 48
- por mm7, mm4
-%endif
-%if i<3
- psrlq mm0, 16
- psrlq mm1, 16
- psrlq mm2, 16
-%endif
-%assign i i+1
-%endrep
- movq [dstq+wq], mm7
- add wq, 8
- jl .loop
- movzx r2d, word [dstq-2]
- mov [leftq], r2d
- movzx r2d, word [topq-2]
- mov [left_topq], r2d
- RET
-
-cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
- add wd, wd
- movd mm7, maskd
- SPLATW mm7, mm7
- movq mm0, [src1q]
- movq mm2, [src2q]
- psllq mm0, 16
- psllq mm2, 16
- movd mm6, [left_topq]
- por mm0, mm6
- movd mm6, [leftq]
- por mm2, mm6
- xor maskq, maskq
-.loop:
- movq mm1, [src1q + maskq]
- movq mm3, [src2q + maskq]
- movq mm4, mm2
- psubw mm2, mm0
- paddw mm2, mm1
- pand mm2, mm7
- movq mm5, mm4
- pmaxsw mm4, mm1
- pminsw mm1, mm5
- pminsw mm4, mm2
- pmaxsw mm4, mm1
- psubw mm3, mm4
- pand mm3, mm7
- movq [dstq + maskq], mm3
- add maskq, 8
- movq mm0, [src1q + maskq - 2]
- movq mm2, [src2q + maskq - 2]
- cmp maskq, wq
- jb .loop
- movzx maskd, word [src1q + wq - 2]
- mov [left_topq], maskd
- movzx maskd, word [src2q + wq - 2]
- mov [leftq], maskd
- RET