]> git.sesse.net Git - ffmpeg/commitdiff
sws/x86: improve rounding for yuv2yuvX
authorMichael Niedermayer <michaelni@gmx.at>
Fri, 15 Feb 2013 20:08:51 +0000 (21:08 +0100)
committerMichael Niedermayer <michaelni@gmx.at>
Fri, 15 Feb 2013 22:33:04 +0000 (23:33 +0100)
This tries to compensate for the errors introduced by
the rounding of pmulhw

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
libswscale/x86/swscale.c
libswscale/x86/swscale_template.c

index 02c454e08f9d0280a8134b68cdf0967b4cf5b15a..2f67b1b03f86d699960fad06da7d3d360fc804a3 100644 (file)
@@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
                          :: "r"(dither)
                          );
     }
+    filterSize--;
     __asm__ volatile(
         "pxor      %%xmm0, %%xmm0\n\t"
         "punpcklbw %%xmm0, %%xmm3\n\t"
-        "psraw        $4, %%xmm3\n\t"
+        "movd          %0, %%xmm1\n\t"
+        "punpcklwd %%xmm1, %%xmm1\n\t"
+        "punpckldq %%xmm1, %%xmm1\n\t"
+        "punpcklqdq %%xmm1, %%xmm1\n\t"
+        "psllw         $3, %%xmm1\n\t"
+        "paddw     %%xmm1, %%xmm3\n\t"
+        "psraw         $4, %%xmm3\n\t"
+        ::"m"(filterSize)
+     );
+    __asm__ volatile(
         "movdqa    %%xmm3, %%xmm4\n\t"
         "movdqa    %%xmm3, %%xmm7\n\t"
         "movl %3, %%ecx\n\t"
index 62265db30ff5b17088ed299ef5b20311a6750eb3..f2567c1d8b5de20369c4d716a0fd8236ba56dea5 100644 (file)
@@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
                            const uint8_t *dither, int offset)
 {
     dither_8to16(dither, offset);
-    __asm__ volatile(\
+    filterSize--;
+    __asm__ volatile(
+        "movd %0, %%mm1\n\t"
+        "punpcklwd %%mm1, %%mm1\n\t"
+        "punpckldq %%mm1, %%mm1\n\t"
+        "psllw        $3, %%mm1\n\t"
+        "paddw     %%mm1, %%mm3\n\t"
+        "paddw     %%mm1, %%mm4\n\t"
         "psraw        $4, %%mm3\n\t"
         "psraw        $4, %%mm4\n\t"
+        ::"m"(filterSize)
+     );
+
+    __asm__ volatile(\
         "movq    %%mm3, %%mm6\n\t"
         "movq    %%mm4, %%mm7\n\t"
         "movl %3, %%ecx\n\t"