"sub $2, %0 \n\t"
" jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2)
-- : "r" ((long)stride)
-+ : "r" ((long)stride), "m" (bone)
+- : "r" ((x86_reg)stride)
++ : "r" ((x86_reg)stride), "m" (bone)
);
}
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
-- : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
-+ : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0])
+- : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride)
++ : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride), "m" (round_tab[0])
);
}
-
-Index: libavcodec/i386/dsputil_h264_template_mmx.c
-===================================================================
---- libavcodec/i386/dsputil_h264_template_mmx.c (revision 11270)
-+++ libavcodec/i386/dsputil_h264_template_mmx.c (working copy)
-@@ -188,8 +188,8 @@
- "pxor %%mm7, %%mm7 \n\t"
- "movd %5, %%mm2 \n\t"
- "movd %6, %%mm3 \n\t"
-- "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
-- "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
-+ "movq %7, %%mm4\n\t"
-+ "movq %7, %%mm5\n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
- "punpcklwd %%mm3, %%mm3 \n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
-@@ -246,7 +246,7 @@
- "sub $2, %2 \n\t"
- "jnz 1b \n\t"
- : "+r"(dst), "+r"(src), "+r"(h)
-- : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
-+ : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8)
- );
- }
-
-Index: libavcodec/i386/dsputil_mmx.c
-===================================================================
---- libavcodec/i386/dsputil_mmx.c (revision 11270)
-+++ libavcodec/i386/dsputil_mmx.c (working copy)
-@@ -1920,7 +1920,7 @@
-
- #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
- "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
-- "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
-+ "movq "#pw_20", %%mm4 \n\t" /* 20 */\
- "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
- "movq "#in7", " #m3 " \n\t" /* d */\
- "movq "#in0", %%mm5 \n\t" /* D */\
-@@ -1932,7 +1932,7 @@
- "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
- "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
- "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
-+ "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\
- "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
- "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
- "psraw $5, %%mm5 \n\t"\
-@@ -1966,10 +1966,10 @@
- "paddw %%mm5, %%mm5 \n\t" /* 2b */\
- "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
- "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
-+ "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
- "paddw %%mm4, %%mm0 \n\t" /* a */\
- "paddw %%mm1, %%mm5 \n\t" /* d */\
-- "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
-+ "pmullw %7, %%mm0 \n\t" /* 20a */\
- "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
- "paddw %6, %%mm6 \n\t"\
- "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
-@@ -1992,10 +1992,10 @@
- "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
- "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
- "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
-+ "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
- "paddw %%mm2, %%mm1 \n\t" /* a */\
- "paddw %%mm6, %%mm4 \n\t" /* d */\
-- "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
-+ "pmullw %7, %%mm1 \n\t" /* 20a */\
- "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
- "paddw %6, %%mm1 \n\t"\
- "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
-@@ -2018,7 +2018,7 @@
- "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
- "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
- "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
-+ "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\
- "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
- "paddw %%mm3, %%mm2 \n\t" /* d */\
- "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
-@@ -2026,7 +2026,7 @@
- "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
- "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
- "paddw %%mm2, %%mm6 \n\t" /* a */\
-- "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
-+ "pmullw %7, %%mm6 \n\t" /* 20a */\
- "paddw %6, %%mm0 \n\t"\
- "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
- "psraw $5, %%mm0 \n\t"\
-@@ -2041,8 +2041,8 @@
- "paddw %%mm2, %%mm5 \n\t" /* d */\
- "paddw %%mm6, %%mm6 \n\t" /* 2b */\
- "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
-- "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
-+ "pmullw %7, %%mm3 \n\t" /* 20a */\
-+ "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\
- "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
- "paddw %6, %%mm4 \n\t"\
- "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
-@@ -2055,7 +2055,9 @@
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+m"(h)\
-- : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
-+ : "d"((long)srcStride), "S"((long)dstStride),\
-+ "m"(temp), "m"(ROUNDER),\
-+ "m"(ff_pw_20), "m"(ff_pw_3)\
- : "memory"\
- );\
- }\
-@@ -2133,10 +2135,10 @@
- "paddw %%mm5, %%mm5 \n\t" /* 2b */\
- "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
- "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
-+ "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
- "paddw %%mm4, %%mm0 \n\t" /* a */\
- "paddw %%mm1, %%mm5 \n\t" /* d */\
-- "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
-+ "pmullw %7, %%mm0 \n\t" /* 20a */\
- "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
- "paddw %6, %%mm6 \n\t"\
- "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
-@@ -2154,8 +2156,8 @@
- "paddw %%mm5, %%mm4 \n\t" /* d */\
- "paddw %%mm2, %%mm2 \n\t" /* 2b */\
- "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
-- "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
-- "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
-+ "pmullw %7, %%mm1 \n\t" /* 20a */\
-+ "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
- "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
- "paddw %6, %%mm1 \n\t"\
- "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
-@@ -2168,7 +2170,9 @@
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+m"(h)\
-- : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
-+ : "S"((long)srcStride), "D"((long)dstStride),\
-+ "m"(temp), "m"(ROUNDER),\
-+ "m"(ff_pw_20), "m"(ff_pw_3)\
- : "memory"\
- );\
- }\
-@@ -2247,31 +2251,31 @@
- "movq 8(%0), %%mm1 \n\t"\
- "movq 16(%0), %%mm2 \n\t"\
- "movq 24(%0), %%mm3 \n\t"\
-- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
- \
-- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
- \
-- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
- "add %4, %1 \n\t" \
-- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
- \
- "add $136, %0 \n\t"\
- "add %6, %1 \n\t"\
-@@ -2279,7 +2283,9 @@
- " jnz 1b \n\t"\
- \
- : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
-- : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
-+ : "r"((long)dstStride), "r"(2*(long)dstStride),\
-+ "m"(ROUNDER), "g"(4-14*(long)dstStride),\
-+ "m"(ff_pw_20), "m"(ff_pw_3)\
- :"memory"\
- );\
- }\
-@@ -2319,19 +2325,19 @@
- "movq 8(%0), %%mm1 \n\t"\
- "movq 16(%0), %%mm2 \n\t"\
- "movq 24(%0), %%mm3 \n\t"\
-- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
- \
-- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
- \
-- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
-- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
-- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
-+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
-+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
- \
- "add $72, %0 \n\t"\
- "add %6, %1 \n\t"\
-@@ -2339,7 +2345,9 @@
- " jnz 1b \n\t"\
- \
- : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
-- : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
-+ : "r"((long)dstStride), "r"(2*(long)dstStride),\
-+ "m"(ROUNDER), "g"(4-6*(long)dstStride),\
-+ "m"(ff_pw_20), "m"(ff_pw_3)\
- : "memory"\
- );\
- }\
+
Index: libavcodec/i386/simple_idct_mmx.c
===================================================================
--- libavcodec/i386/simple_idct_mmx.c (revision 11270)
}
Index: libavcodec/i386/cavsdsp_mmx.c
===================================================================
---- libavcodec/i386/cavsdsp_mmx.c (revision 11270)
+--- libavcodec/i386/cavsdsp_mmx.c (revision 13752)
+++ libavcodec/i386/cavsdsp_mmx.c (working copy)
-@@ -23,9 +23,15 @@
- */
-
- #include "dsputil.h"
+@@ -25,8 +25,30 @@
+ #include "libavutil/common.h"
+ #include "libavutil/x86_cpu.h"
+ #include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
- #include "common.h"
++#define SUMSUB_BA( a, b ) \
++"paddw "#b", "#a" \n\t"\
++"paddw "#b", "#b" \n\t"\
++"psubw "#a", "#b" \n\t"
++
++#define SBUTTERFLY(a,b,t,n,m)\
++"mov" #m " " #a ", " #t " \n\t" /* abcd */\
++"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
++"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
++
++#define TRANSPOSE4(a,b,c,d,t)\
++SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
++SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
++SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
++SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
++
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
* inverse transform
Index: libavcodec/i386/flacdsp_mmx.c
===================================================================
---- libavcodec/i386/flacdsp_mmx.c (revision 11270)
+--- libavcodec/i386/flacdsp_mmx.c (revision 11888)
+++ libavcodec/i386/flacdsp_mmx.c (working copy)
@@ -26,7 +26,6 @@
double c = 2.0 / (len-1.0);
int n2 = len>>1;
- long i = -n2*sizeof(int32_t);
-- long j = n2*sizeof(int32_t);
+ x86_reg i = -n2*sizeof(int32_t);
+- x86_reg j = n2*sizeof(int32_t);
asm volatile(
"movsd %0, %%xmm7 \n\t"
"movapd %1, %%xmm6 \n\t"
-@@ -44,17 +43,18 @@
- "movapd %%xmm6, %%xmm0 \n\t"\
- "subpd %%xmm1, %%xmm0 \n\t"\
- "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
-- "cvtpi2pd (%4,%0), %%xmm2 \n\t"\
-- "cvtpi2pd (%5,%1), %%xmm3 \n\t"\
-+ "cvtpi2pd (%3,%0), %%xmm2 \n\t"\
- "mulpd %%xmm0, %%xmm2 \n\t"\
-+ "movapd %%xmm2, (%1,%0,2) \n\t"\
-+ "negl %0\n\t"\
-+ "cvtpi2pd (%4,%0), %%xmm3 \n\t"\
- "mulpd %%xmm1, %%xmm3 \n\t"\
-- "movapd %%xmm2, (%2,%0,2) \n\t"\
-- MOVPD" %%xmm3, (%3,%1,2) \n\t"\
-+ MOVPD" %%xmm3, (%2,%0,2) \n\t"\
- "subpd %%xmm5, %%xmm7 \n\t"\
-- "sub $8, %1 \n\t"\
-+ "negl %0\n\t"\
+@@ -54,7 +53,7 @@
+ "sub $8, %1 \n\t"\
"add $8, %0 \n\t"\
"jl 1b \n\t"\
- :"+&r"(i), "+&r"(j)\
+ :"+&r"(i)\
- :"r"(w_data+n2), "r"(w_data+len-2-n2),\
- "r"(data+n2), "r"(data+len-2-n2)\
+ :"r"(w_data+n2), "r"(data+n2)\
);
-@@ -85,9 +85,11 @@
- long i = -len*sizeof(double);
+ if(len&1)
+@@ -84,9 +83,11 @@
+ x86_reg i = -len*sizeof(double);
if(j == lag-2) {
asm volatile(
- "movsd %6, %%xmm0 \n\t"
"1: \n\t"
"movapd (%4,%0), %%xmm3 \n\t"
"movupd -8(%5,%0), %%xmm4 \n\t"
-@@ -110,12 +112,14 @@
+@@ -109,12 +110,14 @@
"movsd %%xmm1, %2 \n\t"
"movsd %%xmm2, %3 \n\t"
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
"1: \n\t"
"movapd (%3,%0), %%xmm3 \n\t"
"movupd -8(%4,%0), %%xmm4 \n\t"
-@@ -132,7 +136,7 @@
+@@ -131,7 +134,7 @@
"movsd %%xmm0, %1 \n\t"
"movsd %%xmm1, %2 \n\t"
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])