-Index: ffmpeg/libavcodec/i386/dsputil_mmx.c
+Index: libavcodec/i386/motion_est_mmx.c
===================================================================
---- ffmpeg/libavcodec/i386/dsputil_mmx.c (revision 10759)
-+++ ffmpeg/libavcodec/i386/dsputil_mmx.c (working copy)
-@@ -2978,6 +2978,7 @@
- ::"m"(c), "m"(*ff_pd_1), "m"(*ff_pd_2)
+--- libavcodec/i386/motion_est_mmx.c (revision 10865)
++++ libavcodec/i386/motion_est_mmx.c (working copy)
+@@ -167,7 +167,7 @@
+ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+ {
+ asm volatile(
+- "movq "MANGLE(bone)", %%mm5 \n\t"
++ "movq %4, %%mm5 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "pavgb 1(%1), %%mm0 \n\t"
+ "add %3, %1 \n\t"
+@@ -190,7 +190,7 @@
+ "sub $2, %0 \n\t"
+ " jg 1b \n\t"
+ : "+r" (h), "+r" (blk1), "+r" (blk2)
+- : "r" ((long)stride)
++ : "r" ((long)stride), "m" (bone)
);
- #define WELCH(MOVPD)\
-+ do {\
- asm volatile(\
- "1: \n\t"\
- "movapd %%xmm7, %%xmm1 \n\t"\
-@@ -2985,8 +2986,12 @@
+ }
+
+@@ -258,7 +258,7 @@
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddw %%mm4, %%mm2 \n\t"
+ "paddw %%mm5, %%mm3 \n\t"
+- "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
++ "movq 16+%5, %%mm5 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+ "paddw %%mm3, %%mm1 \n\t"
+ "paddw %%mm5, %%mm0 \n\t"
+@@ -281,7 +281,7 @@
+ "add %4, %%"REG_a" \n\t"
+ " js 1b \n\t"
+ : "+a" (len)
+- : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
++ : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0])
+ );
+ }
+
+Index: libavcodec/i386/dsputil_h264_template_mmx.c
+===================================================================
+--- libavcodec/i386/dsputil_h264_template_mmx.c (revision 10865)
++++ libavcodec/i386/dsputil_h264_template_mmx.c (working copy)
+@@ -188,8 +188,8 @@
+ "pxor %%mm7, %%mm7 \n\t"
+ "movd %5, %%mm2 \n\t"
+ "movd %6, %%mm3 \n\t"
+- "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
+- "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
++ "movq %7, %%mm4\n\t"
++ "movq %7, %%mm5\n\t"
+ "punpcklwd %%mm2, %%mm2 \n\t"
+ "punpcklwd %%mm3, %%mm3 \n\t"
+ "punpcklwd %%mm2, %%mm2 \n\t"
+@@ -246,7 +246,7 @@
+ "sub $2, %2 \n\t"
+ "jnz 1b \n\t"
+ : "+r"(dst), "+r"(src), "+r"(h)
+- : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
++ : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8)
+ );
+ }
+
+Index: libavcodec/i386/dsputil_mmx.c
+===================================================================
+--- libavcodec/i386/dsputil_mmx.c (revision 10865)
++++ libavcodec/i386/dsputil_mmx.c (working copy)
+@@ -1917,7 +1917,7 @@
+
+ #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
+ "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
+- "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
++ "movq "#pw_20", %%mm4 \n\t" /* 20 */\
+ "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
+ "movq "#in7", " #m3 " \n\t" /* d */\
+ "movq "#in0", %%mm5 \n\t" /* D */\
+@@ -1929,7 +1929,7 @@
+ "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
+ "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
+ "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
++ "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\
+ "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
+ "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
+ "psraw $5, %%mm5 \n\t"\
+@@ -1963,10 +1963,10 @@
+ "paddw %%mm5, %%mm5 \n\t" /* 2b */\
+ "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
+ "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
++ "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
+ "paddw %%mm4, %%mm0 \n\t" /* a */\
+ "paddw %%mm1, %%mm5 \n\t" /* d */\
+- "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
++ "pmullw %7, %%mm0 \n\t" /* 20a */\
+ "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
+ "paddw %6, %%mm6 \n\t"\
+ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
+@@ -1989,10 +1989,10 @@
+ "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
+ "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
+ "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
++ "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
+ "paddw %%mm2, %%mm1 \n\t" /* a */\
+ "paddw %%mm6, %%mm4 \n\t" /* d */\
+- "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
++ "pmullw %7, %%mm1 \n\t" /* 20a */\
+ "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
+ "paddw %6, %%mm1 \n\t"\
+ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
+@@ -2015,7 +2015,7 @@
+ "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
+ "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
+ "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
++ "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\
+ "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
+ "paddw %%mm3, %%mm2 \n\t" /* d */\
+ "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
+@@ -2023,7 +2023,7 @@
+ "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
+ "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
+ "paddw %%mm2, %%mm6 \n\t" /* a */\
+- "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
++ "pmullw %7, %%mm6 \n\t" /* 20a */\
+ "paddw %6, %%mm0 \n\t"\
+ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
+ "psraw $5, %%mm0 \n\t"\
+@@ -2038,8 +2038,8 @@
+ "paddw %%mm2, %%mm5 \n\t" /* d */\
+ "paddw %%mm6, %%mm6 \n\t" /* 2b */\
+ "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
+- "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
++ "pmullw %7, %%mm3 \n\t" /* 20a */\
++ "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\
+ "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
+ "paddw %6, %%mm4 \n\t"\
+ "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
+@@ -2052,7 +2052,9 @@
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+m"(h)\
+- : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
++ : "d"((long)srcStride), "S"((long)dstStride),\
++ "m"(temp), "m"(ROUNDER),\
++ "m"(ff_pw_20), "m"(ff_pw_3)\
+ : "memory"\
+ );\
+ }\
+@@ -2130,10 +2132,10 @@
+ "paddw %%mm5, %%mm5 \n\t" /* 2b */\
+ "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
+ "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
++ "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
+ "paddw %%mm4, %%mm0 \n\t" /* a */\
+ "paddw %%mm1, %%mm5 \n\t" /* d */\
+- "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
++ "pmullw %7, %%mm0 \n\t" /* 20a */\
+ "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
+ "paddw %6, %%mm6 \n\t"\
+ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
+@@ -2151,8 +2153,8 @@
+ "paddw %%mm5, %%mm4 \n\t" /* d */\
+ "paddw %%mm2, %%mm2 \n\t" /* 2b */\
+ "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
+- "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+- "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
++ "pmullw %7, %%mm1 \n\t" /* 20a */\
++ "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
+ "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
+ "paddw %6, %%mm1 \n\t"\
+ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
+@@ -2165,7 +2167,9 @@
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+m"(h)\
+- : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
++ : "S"((long)srcStride), "D"((long)dstStride),\
++ "m"(temp), "m"(ROUNDER),\
++ "m"(ff_pw_20), "m"(ff_pw_3)\
+ : "memory"\
+ );\
+ }\
+@@ -2244,31 +2248,31 @@
+ "movq 8(%0), %%mm1 \n\t"\
+ "movq 16(%0), %%mm2 \n\t"\
+ "movq 24(%0), %%mm3 \n\t"\
+- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
+ \
+- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
+ \
+- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t" \
+- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
+ \
+ "add $136, %0 \n\t"\
+ "add %6, %1 \n\t"\
+@@ -2276,7 +2280,9 @@
+ " jnz 1b \n\t"\
+ \
+ : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
+- : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
++ : "r"((long)dstStride), "r"(2*(long)dstStride),\
++ "m"(ROUNDER), "g"(4-14*(long)dstStride),\
++ "m"(ff_pw_20), "m"(ff_pw_3)\
+ :"memory"\
+ );\
+ }\
+@@ -2316,19 +2322,19 @@
+ "movq 8(%0), %%mm1 \n\t"\
+ "movq 16(%0), %%mm2 \n\t"\
+ "movq 24(%0), %%mm3 \n\t"\
+- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
+ \
+- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
+ \
+- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
+- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
++ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
++ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
+ \
+ "add $72, %0 \n\t"\
+ "add %6, %1 \n\t"\
+@@ -2336,7 +2342,9 @@
+ " jnz 1b \n\t"\
+ \
+ : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
+- : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
++ : "r"((long)dstStride), "r"(2*(long)dstStride),\
++ "m"(ROUNDER), "g"(4-6*(long)dstStride),\
++ "m"(ff_pw_20), "m"(ff_pw_3)\
+ : "memory"\
+ );\
+ }\
+@@ -2967,7 +2975,6 @@
+ double c = 2.0 / (len-1.0);
+ int n2 = len>>1;
+ long i = -n2*sizeof(int32_t);
+- long j = n2*sizeof(int32_t);
+ asm volatile(
+ "movsd %0, %%xmm7 \n\t"
+ "movapd %1, %%xmm6 \n\t"
+@@ -2985,17 +2992,18 @@
"movapd %%xmm6, %%xmm0 \n\t"\
"subpd %%xmm1, %%xmm0 \n\t"\
"pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
- "cvtpi2pd (%4,%0), %%xmm2 \n\t"\
- "cvtpi2pd (%5,%1), %%xmm3 \n\t"\
-+ "cvtpi2pd (%2,%0), %%xmm2 \n\t"\
-+ "cvtpi2pd (%3,%1), %%xmm3 \n\t"\
-+ :"+&r"(i), "+&r"(j)\
-+ :"r"(data+n2), "r"(data+len-2-n2)\
-+ );\
-+ asm volatile(\
++ "cvtpi2pd (%3,%0), %%xmm2 \n\t"\
"mulpd %%xmm0, %%xmm2 \n\t"\
++ "movapd %%xmm2, (%1,%0,2) \n\t"\
++ "negl %0\n\t"\
++ "cvtpi2pd (%4,%0), %%xmm3 \n\t"\
"mulpd %%xmm1, %%xmm3 \n\t"\
- "movapd %%xmm2, (%2,%0,2) \n\t"\
-@@ -2996,9 +3001,9 @@
+- "movapd %%xmm2, (%2,%0,2) \n\t"\
+- MOVPD" %%xmm3, (%3,%1,2) \n\t"\
++ MOVPD" %%xmm3, (%2,%0,2) \n\t"\
+ "subpd %%xmm5, %%xmm7 \n\t"\
+- "sub $8, %1 \n\t"\
++ "negl %0\n\t"\
"add $8, %0 \n\t"\
"jl 1b \n\t"\
- :"+&r"(i), "+&r"(j)\
-- :"r"(w_data+n2), "r"(w_data+len-2-n2),\
-- "r"(data+n2), "r"(data+len-2-n2)\
-- );
-+ :"r"(w_data+n2), "r"(w_data+len-2-n2)\
-+ );\
-+ } while(0);
- if(len&1)
- WELCH("movupd")
- else
-
+- :"+&r"(i), "+&r"(j)\
++ :"+&r"(i)\
+ :"r"(w_data+n2), "r"(w_data+len-2-n2),\
+ "r"(data+n2), "r"(data+len-2-n2)\
+ );
+Index: libavcodec/i386/h264dsp_mmx.c
+===================================================================
+--- libavcodec/i386/h264dsp_mmx.c (revision 10865)
++++ libavcodec/i386/h264dsp_mmx.c (working copy)
+@@ -341,21 +341,21 @@
+ // in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
+ // out: mm1=p0' mm2=q0'
+ // clobbers: mm0,3-6
+-#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\
++#define H264_DEBLOCK_P0_Q0(pb_01, pb_3, pb_a1)\
+ "movq %%mm1 , %%mm5 \n\t"\
+ "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\
+ "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\
+ "pcmpeqb %%mm4 , %%mm4 \n\t"\
+ "pxor %%mm4 , %%mm3 \n\t"\
+ "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
+- "pavgb "MANGLE(ff_pb_3)" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
++ "pavgb "#pb_3" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
+ "pxor %%mm1 , %%mm4 \n\t"\
+ "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
+ "pavgb %%mm5 , %%mm3 \n\t"\
+ "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\
+- "movq "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\
++ "movq "#pb_a1" , %%mm6 \n\t"\
+ "psubusb %%mm3 , %%mm6 \n\t"\
+- "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\
++ "psubusb "#pb_a1" , %%mm3 \n\t"\
+ "pminub %%mm7 , %%mm6 \n\t"\
+ "pminub %%mm7 , %%mm3 \n\t"\
+ "psubusb %%mm6 , %%mm1 \n\t"\
+@@ -422,14 +422,14 @@
+ H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
+
+ /* filter p0, q0 */
+- H264_DEBLOCK_P0_Q0(%8, unused)
++ H264_DEBLOCK_P0_Q0(%8, %9, %10)
+ "movq %%mm1, (%1,%3,2) \n\t"
+ "movq %%mm2, (%2) \n\t"
+
+ : "=m"(*tmp0)
+ : "r"(pix-3*stride), "r"(pix), "r"((long)stride),
+ "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
+- "m"(mm_bone)
++ "m"(mm_bone), "m" (ff_pb_3), "m" (ff_pb_A1)
+ );
+ }
+
+@@ -470,13 +470,13 @@
+ "movd %3, %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm6 \n\t"
+ "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask
+- H264_DEBLOCK_P0_Q0(%6, %7)
++ H264_DEBLOCK_P0_Q0(%6, %7, %8)
+ "movq %%mm1, (%0,%2) \n\t"
+ "movq %%mm2, (%1) \n\t"
+
+ :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),
+ "r"(*(uint32_t*)tc0),
+- "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F)
++ "m"(alpha1), "m"(beta1), "m"(mm_bone), "m" (ff_pb_3), "m" (ff_pb_A1)
+ );
+ }
+
+Index: libavcodec/i386/simple_idct_mmx.c
+===================================================================
+--- libavcodec/i386/simple_idct_mmx.c (revision 10865)
++++ libavcodec/i386/simple_idct_mmx.c (working copy)
+@@ -363,7 +363,7 @@
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+- "movq "MANGLE(wm1010)", %%mm4 \n\t"\
++ "movq %3, %%mm4 \n\t"\
+ "pand %%mm0, %%mm4 \n\t"\
+ "por %%mm1, %%mm4 \n\t"\
+ "por %%mm2, %%mm4 \n\t"\
+@@ -437,7 +437,7 @@
+ "jmp 2f \n\t"\
+ "1: \n\t"\
+ "pslld $16, %%mm0 \n\t"\
+- "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
++ "#paddd %4, %%mm0 \n\t"\
+ "psrad $13, %%mm0 \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t"\
+ "movq %%mm0, " #dst " \n\t"\
+@@ -471,7 +471,7 @@
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+- "movq "MANGLE(wm1010)", %%mm4 \n\t"\
++ "movq %3, %%mm4 \n\t"\
+ "pand %%mm0, %%mm4 \n\t"\
+ "por %%mm1, %%mm4 \n\t"\
+ "por %%mm2, %%mm4 \n\t"\
+@@ -545,7 +545,7 @@
+ "jmp 2f \n\t"\
+ "1: \n\t"\
+ "pslld $16, %%mm0 \n\t"\
+- "paddd "MANGLE(d40000)", %%mm0 \n\t"\
++ "paddd %4, %%mm0 \n\t"\
+ "psrad $13, %%mm0 \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t"\
+ "movq %%mm0, " #dst " \n\t"\
+@@ -1270,7 +1270,7 @@
+ */
+
+ "9: \n\t"
+- :: "r" (block), "r" (temp), "r" (coeffs)
++ :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000)
+ : "%eax"
+ );
+ }