neg r2
pxor m4, m4
.loop:
- mova m0, [r1+r2]
+ mova xm0, [r1+r2]
+ mova xm1, [r1+r2+16]
%if mmsize==32
- movu m1, [r1+r2+8]
+ vinserti128 m0, m0, [r1+r2+ 8], 1
+ vinserti128 m1, m1, [r1+r2+24], 1
%else
- mova m1, [r1+r2+16]
palignr m1, m0, 8
%endif
mpsadbw m0, m4, 0
neg r2
pxor m4, m4
.loop:
- mova m0, [r1+r2]
+ mova xm0, [r1+r2]
+ mova xm1, [r1+r2+16]
%if mmsize==32
- movu m1, [r1+r2+8]
+ vinserti128 m0, m0, [r1+r2+ 8], 1
+ vinserti128 m1, m1, [r1+r2+24], 1
mpsadbw m2, m0, m4, 100100b
mpsadbw m3, m1, m4, 100100b
%else
- mova m1, [r1+r2+16]
palignr m1, m0, 8
mpsadbw m2, m0, m4, 100b
mpsadbw m3, m1, m4, 100b