;=============================================================================
%macro SAD_END_SSE2 0
- movhlps m1, m0
+ MOVHL m1, m0
paddw m0, m1
movd eax, m0
RET
sub r2d, 2
jg .loop
.end:
- movhlps m1, m0
+ MOVHL m1, m0
;max sum: 31*16*255(pixel_max)=126480
paddd m0, m1
movd eax, m0
paddw xmm1, xmm2
paddw xmm1, xmm3
paddw xmm1, xmm4
- movhlps xmm0, xmm1
+ MOVHL xmm0, xmm1
paddw xmm1, xmm0
movd [r2], xmm1
%else
%if mmsize==16
pslldq m3, 4
por m3, m2
- movhlps m1, m3
+ MOVHL m1, m3
paddw m3, m1
movq [r2+0], m3
- movhlps m1, m4
+ MOVHL m1, m4
paddw m4, m1
%else
movd [r2+0], m2
cglobal intra_sad_x3_16x16, 3,5,6
pxor xm0, xm0
psadbw xm0, [r1-FDEC_STRIDE]
- movhlps xm1, xm0
+ MOVHL xm1, xm0
paddw xm0, xm1
movd r3d, xm0
%assign x 0
add r3d, -FENC_STRIDE
jge .vloop
punpckhqdq m5, m4, m4
- movhlps xm2, xm3
+ MOVHL xm2, xm3
paddw m4, m5 ; DC / V
paddw xm3, xm2 ; H
vextracti128 xm2, m4, 1
mov r4d, %2/2
pxor xmm0, xmm0
call r5
- movhlps xmm1, xmm0
+ MOVHL xmm1, xmm0
paddw xmm0, xmm1
movd eax, xmm0
RET