1 Index: libavcodec/i386/motion_est_mmx.c
2 ===================================================================
3 --- libavcodec/i386/motion_est_mmx.c (revision 11270)
4 +++ libavcodec/i386/motion_est_mmx.c (working copy)
6 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
9 - "movq "MANGLE(bone)", %%mm5 \n\t"
10 + "movq %4, %%mm5 \n\t"
11 "movq (%1), %%mm0 \n\t"
12 "pavgb 1(%1), %%mm0 \n\t"
17 : "+r" (h), "+r" (blk1), "+r" (blk2)
18 - : "r" ((long)stride)
19 + : "r" ((long)stride), "m" (bone)
24 "punpckhbw %%mm7, %%mm5 \n\t"
25 "paddw %%mm4, %%mm2 \n\t"
26 "paddw %%mm5, %%mm3 \n\t"
27 - "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
28 + "movq 16+%5, %%mm5 \n\t"
29 "paddw %%mm2, %%mm0 \n\t"
30 "paddw %%mm3, %%mm1 \n\t"
31 "paddw %%mm5, %%mm0 \n\t"
33 "add %4, %%"REG_a" \n\t"
36 - : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
37 + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0])
41 Index: libavcodec/i386/dsputil_h264_template_mmx.c
42 ===================================================================
43 --- libavcodec/i386/dsputil_h264_template_mmx.c (revision 11834)
44 +++ libavcodec/i386/dsputil_h264_template_mmx.c (working copy)
46 "pxor %%mm7, %%mm7 \n\t"
49 - "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
50 - "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
51 + "movq %7, %%mm4\n\t"
52 + "movq %7, %%mm5\n\t"
53 "punpcklwd %%mm2, %%mm2 \n\t"
54 "punpcklwd %%mm3, %%mm3 \n\t"
55 "punpcklwd %%mm2, %%mm2 \n\t"
59 : "+r"(dst), "+r"(src), "+r"(h)
60 - : "r"((long)stride), "m"(*ff_pw_32), "m"(x), "m"(y)
61 + : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8)
65 Index: libavcodec/i386/dsputil_mmx.c
66 ===================================================================
67 --- libavcodec/i386/dsputil_mmx.c (revision 11270)
68 +++ libavcodec/i386/dsputil_mmx.c (working copy)
71 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
72 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
73 - "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
74 + "movq "#pw_20", %%mm4 \n\t" /* 20 */\
75 "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
76 "movq "#in7", " #m3 " \n\t" /* d */\
77 "movq "#in0", %%mm5 \n\t" /* D */\
79 "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
80 "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
81 "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
82 - "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
83 + "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\
84 "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
85 "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
86 "psraw $5, %%mm5 \n\t"\
87 @@ -1966,10 +1966,10 @@
88 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
89 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
90 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
91 - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
92 + "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
93 "paddw %%mm4, %%mm0 \n\t" /* a */\
94 "paddw %%mm1, %%mm5 \n\t" /* d */\
95 - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
96 + "pmullw %7, %%mm0 \n\t" /* 20a */\
97 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
98 "paddw %6, %%mm6 \n\t"\
99 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
100 @@ -1992,10 +1992,10 @@
101 "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
102 "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
103 "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
104 - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
105 + "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
106 "paddw %%mm2, %%mm1 \n\t" /* a */\
107 "paddw %%mm6, %%mm4 \n\t" /* d */\
108 - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
109 + "pmullw %7, %%mm1 \n\t" /* 20a */\
110 "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
111 "paddw %6, %%mm1 \n\t"\
112 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
113 @@ -2018,7 +2018,7 @@
114 "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
115 "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
116 "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
117 - "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
118 + "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\
119 "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
120 "paddw %%mm3, %%mm2 \n\t" /* d */\
121 "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
122 @@ -2026,7 +2026,7 @@
123 "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
124 "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
125 "paddw %%mm2, %%mm6 \n\t" /* a */\
126 - "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
127 + "pmullw %7, %%mm6 \n\t" /* 20a */\
128 "paddw %6, %%mm0 \n\t"\
129 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
130 "psraw $5, %%mm0 \n\t"\
131 @@ -2041,8 +2041,8 @@
132 "paddw %%mm2, %%mm5 \n\t" /* d */\
133 "paddw %%mm6, %%mm6 \n\t" /* 2b */\
134 "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
135 - "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
136 - "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
137 + "pmullw %7, %%mm3 \n\t" /* 20a */\
138 + "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\
139 "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
140 "paddw %6, %%mm4 \n\t"\
141 "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
142 @@ -2055,7 +2055,9 @@
145 : "+a"(src), "+c"(dst), "+m"(h)\
146 - : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
147 + : "d"((long)srcStride), "S"((long)dstStride),\
148 + "m"(temp), "m"(ROUNDER),\
149 + "m"(ff_pw_20), "m"(ff_pw_3)\
153 @@ -2133,10 +2135,10 @@
154 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
155 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
156 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
157 - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
158 + "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
159 "paddw %%mm4, %%mm0 \n\t" /* a */\
160 "paddw %%mm1, %%mm5 \n\t" /* d */\
161 - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
162 + "pmullw %7, %%mm0 \n\t" /* 20a */\
163 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
164 "paddw %6, %%mm6 \n\t"\
165 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
166 @@ -2154,8 +2156,8 @@
167 "paddw %%mm5, %%mm4 \n\t" /* d */\
168 "paddw %%mm2, %%mm2 \n\t" /* 2b */\
169 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
170 - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
171 - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
172 + "pmullw %7, %%mm1 \n\t" /* 20a */\
173 + "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
174 "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
175 "paddw %6, %%mm1 \n\t"\
176 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
177 @@ -2168,7 +2170,9 @@
180 : "+a"(src), "+c"(dst), "+m"(h)\
181 - : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
182 + : "S"((long)srcStride), "D"((long)dstStride),\
183 + "m"(temp), "m"(ROUNDER),\
184 + "m"(ff_pw_20), "m"(ff_pw_3)\
188 @@ -2247,31 +2251,31 @@
189 "movq 8(%0), %%mm1 \n\t"\
190 "movq 16(%0), %%mm2 \n\t"\
191 "movq 24(%0), %%mm3 \n\t"\
192 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
193 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
194 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
195 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
197 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
198 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
200 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
201 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
203 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
204 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
205 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
206 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
208 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
209 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
210 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
211 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
213 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
214 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
215 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
216 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
218 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
219 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
220 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
221 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
223 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
224 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
226 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
227 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
229 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
230 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
231 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
232 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
236 @@ -2279,7 +2283,9 @@
239 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
240 - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
241 + : "r"((long)dstStride), "r"(2*(long)dstStride),\
242 + "m"(ROUNDER), "g"(4-14*(long)dstStride),\
243 + "m"(ff_pw_20), "m"(ff_pw_3)\
247 @@ -2319,19 +2325,19 @@
248 "movq 8(%0), %%mm1 \n\t"\
249 "movq 16(%0), %%mm2 \n\t"\
250 "movq 24(%0), %%mm3 \n\t"\
251 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
252 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
253 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
254 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
256 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
257 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
259 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
260 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
262 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
263 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
265 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
266 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
268 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
269 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
270 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
271 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
275 @@ -2339,7 +2345,9 @@
278 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
279 - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
280 + : "r"((long)dstStride), "r"(2*(long)dstStride),\
281 + "m"(ROUNDER), "g"(4-6*(long)dstStride),\
282 + "m"(ff_pw_20), "m"(ff_pw_3)\
286 Index: libavcodec/i386/simple_idct_mmx.c
287 ===================================================================
288 --- libavcodec/i386/simple_idct_mmx.c (revision 11270)
289 +++ libavcodec/i386/simple_idct_mmx.c (working copy)
291 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
292 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
293 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
294 - "movq "MANGLE(wm1010)", %%mm4 \n\t"\
295 + "movq %3, %%mm4 \n\t"\
296 "pand %%mm0, %%mm4 \n\t"\
297 "por %%mm1, %%mm4 \n\t"\
298 "por %%mm2, %%mm4 \n\t"\
302 "pslld $16, %%mm0 \n\t"\
303 - "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
304 + "#paddd %4, %%mm0 \n\t"\
305 "psrad $13, %%mm0 \n\t"\
306 "packssdw %%mm0, %%mm0 \n\t"\
307 "movq %%mm0, " #dst " \n\t"\
309 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
310 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
311 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
312 - "movq "MANGLE(wm1010)", %%mm4 \n\t"\
313 + "movq %3, %%mm4 \n\t"\
314 "pand %%mm0, %%mm4 \n\t"\
315 "por %%mm1, %%mm4 \n\t"\
316 "por %%mm2, %%mm4 \n\t"\
320 "pslld $16, %%mm0 \n\t"\
321 - "paddd "MANGLE(d40000)", %%mm0 \n\t"\
322 + "paddd %4, %%mm0 \n\t"\
323 "psrad $13, %%mm0 \n\t"\
324 "packssdw %%mm0, %%mm0 \n\t"\
325 "movq %%mm0, " #dst " \n\t"\
326 @@ -1270,7 +1270,7 @@
330 - :: "r" (block), "r" (temp), "r" (coeffs)
331 + :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000)
335 Index: libavcodec/i386/cavsdsp_mmx.c
336 ===================================================================
337 --- libavcodec/i386/cavsdsp_mmx.c (revision 11727)
338 +++ libavcodec/i386/cavsdsp_mmx.c (working copy)
343 -#include "dsputil_mmx.h"
346 +#define SUMSUB_BA( a, b ) \
347 +"paddw "#b", "#a" \n\t"\
348 +"paddw "#b", "#b" \n\t"\
349 +"psubw "#a", "#b" \n\t"
351 +#define SBUTTERFLY(a,b,t,n,m)\
352 +"mov" #m " " #a ", " #t " \n\t" /* abcd */\
353 +"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
354 +"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
356 +#define TRANSPOSE4(a,b,c,d,t)\
357 +SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
358 +SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
359 +SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
360 +SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
362 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
363 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
364 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
365 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
366 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
367 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
369 /*****************************************************************************
372 Index: libavcodec/i386/flacdsp_mmx.c
373 ===================================================================
374 --- libavcodec/i386/flacdsp_mmx.c (revision 11270)
375 +++ libavcodec/i386/flacdsp_mmx.c (working copy)
377 double c = 2.0 / (len-1.0);
379 long i = -n2*sizeof(int32_t);
380 - long j = n2*sizeof(int32_t);
382 "movsd %0, %%xmm7 \n\t"
383 "movapd %1, %%xmm6 \n\t"
385 "movapd %%xmm6, %%xmm0 \n\t"\
386 "subpd %%xmm1, %%xmm0 \n\t"\
387 "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
388 - "cvtpi2pd (%4,%0), %%xmm2 \n\t"\
389 - "cvtpi2pd (%5,%1), %%xmm3 \n\t"\
390 + "cvtpi2pd (%3,%0), %%xmm2 \n\t"\
391 "mulpd %%xmm0, %%xmm2 \n\t"\
392 + "movapd %%xmm2, (%1,%0,2) \n\t"\
394 + "cvtpi2pd (%4,%0), %%xmm3 \n\t"\
395 "mulpd %%xmm1, %%xmm3 \n\t"\
396 - "movapd %%xmm2, (%2,%0,2) \n\t"\
397 - MOVPD" %%xmm3, (%3,%1,2) \n\t"\
398 + MOVPD" %%xmm3, (%2,%0,2) \n\t"\
399 "subpd %%xmm5, %%xmm7 \n\t"\
404 - :"+&r"(i), "+&r"(j)\
406 :"r"(w_data+n2), "r"(w_data+len-2-n2),\
407 "r"(data+n2), "r"(data+len-2-n2)\
410 long i = -len*sizeof(double);
413 - "movsd %6, %%xmm0 \n\t"
414 - "movsd %6, %%xmm1 \n\t"
415 - "movsd %6, %%xmm2 \n\t"
416 + "movsd %0, %%xmm0 \n\t"
417 + "movsd %0, %%xmm1 \n\t"
418 + "movsd %0, %%xmm2 \n\t"
419 + :: "m"(*ff_pd_1) );
422 "movapd (%4,%0), %%xmm3 \n\t"
423 "movupd -8(%5,%0), %%xmm4 \n\t"
424 @@ -110,12 +112,14 @@
425 "movsd %%xmm1, %2 \n\t"
426 "movsd %%xmm2, %3 \n\t"
427 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
428 - :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1)
429 + :"r"(data1+len), "r"(data1+len-j)
433 - "movsd %5, %%xmm0 \n\t"
434 - "movsd %5, %%xmm1 \n\t"
435 + "movsd %0, %%xmm0 \n\t"
436 + "movsd %0, %%xmm1 \n\t"
437 + :: "m"(*ff_pd_1) );
440 "movapd (%3,%0), %%xmm3 \n\t"
441 "movupd -8(%4,%0), %%xmm4 \n\t"
443 "movsd %%xmm0, %1 \n\t"
444 "movsd %%xmm1, %2 \n\t"
445 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
446 - :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1)
447 + :"r"(data1+len), "r"(data1+len-j)