X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fx86%2Fdsputil_yasm.asm;h=7ddc770a2eedc884172a4ac782609ae4c1e8e9aa;hb=bf807a5e874442aa3fe1b475459cdd509e34bff4;hp=8e8c10c189df01180a53614592e6c2409bfa4c48;hpb=4e04e1b81e4e31b7a11b3b5033ac97d2da3b866d;p=ffmpeg diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 8e8c10c189d..7ddc770a2ee 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -35,13 +35,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 SECTION_TEXT %macro SCALARPRODUCT 1 -; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift) -cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift +; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order shl orderq, 1 add v1q, orderq add v2q, orderq neg orderq - movd m3, shiftm pxor m2, m2 .loop: movu m0, [v1q + orderq] @@ -55,10 +54,8 @@ cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift %if mmsize == 16 movhlps m0, m2 paddd m2, m0 - psrad m2, m3 pshuflw m0, m2, 0x4e %else - psrad m2, m3 pshufw m0, m2, 0x4e %endif paddd m2, m0