1 Index: libavcodec/i386/motion_est_mmx.c
2 ===================================================================
3 --- libavcodec/i386/motion_est_mmx.c (revision 11270)
4 +++ libavcodec/i386/motion_est_mmx.c (working copy)
6 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
9 - "movq "MANGLE(bone)", %%mm5 \n\t"
10 + "movq %4, %%mm5 \n\t"
11 "movq (%1), %%mm0 \n\t"
12 "pavgb 1(%1), %%mm0 \n\t"
17 : "+r" (h), "+r" (blk1), "+r" (blk2)
18 - : "r" ((x86_reg)stride)
19 + : "r" ((x86_reg)stride), "m" (bone)
24 "punpckhbw %%mm7, %%mm5 \n\t"
25 "paddw %%mm4, %%mm2 \n\t"
26 "paddw %%mm5, %%mm3 \n\t"
27 - "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
28 + "movq 16+%5, %%mm5 \n\t"
29 "paddw %%mm2, %%mm0 \n\t"
30 "paddw %%mm3, %%mm1 \n\t"
31 "paddw %%mm5, %%mm0 \n\t"
33 "add %4, %%"REG_a" \n\t"
36 - : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride)
37 + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride), "m" (round_tab[0])
41 Index: libavcodec/i386/simple_idct_mmx.c
42 ===================================================================
43 --- libavcodec/i386/simple_idct_mmx.c (revision 11270)
44 +++ libavcodec/i386/simple_idct_mmx.c (working copy)
46 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
47 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
48 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
49 - "movq "MANGLE(wm1010)", %%mm4 \n\t"\
50 + "movq %3, %%mm4 \n\t"\
51 "pand %%mm0, %%mm4 \n\t"\
52 "por %%mm1, %%mm4 \n\t"\
53 "por %%mm2, %%mm4 \n\t"\
57 "pslld $16, %%mm0 \n\t"\
58 - "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
59 + "#paddd %4, %%mm0 \n\t"\
60 "psrad $13, %%mm0 \n\t"\
61 "packssdw %%mm0, %%mm0 \n\t"\
62 "movq %%mm0, " #dst " \n\t"\
64 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
65 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
66 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
67 - "movq "MANGLE(wm1010)", %%mm4 \n\t"\
68 + "movq %3, %%mm4 \n\t"\
69 "pand %%mm0, %%mm4 \n\t"\
70 "por %%mm1, %%mm4 \n\t"\
71 "por %%mm2, %%mm4 \n\t"\
75 "pslld $16, %%mm0 \n\t"\
76 - "paddd "MANGLE(d40000)", %%mm0 \n\t"\
77 + "paddd %4, %%mm0 \n\t"\
78 "psrad $13, %%mm0 \n\t"\
79 "packssdw %%mm0, %%mm0 \n\t"\
80 "movq %%mm0, " #dst " \n\t"\
85 - :: "r" (block), "r" (temp), "r" (coeffs)
86 + :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000)
90 Index: libavcodec/i386/cavsdsp_mmx.c
91 ===================================================================
92 --- libavcodec/i386/cavsdsp_mmx.c (revision 13752)
93 +++ libavcodec/i386/cavsdsp_mmx.c (working copy)
95 #include "libavutil/common.h"
96 #include "libavutil/x86_cpu.h"
97 #include "libavcodec/dsputil.h"
98 -#include "dsputil_mmx.h"
100 +#define SUMSUB_BA( a, b ) \
101 +"paddw "#b", "#a" \n\t"\
102 +"paddw "#b", "#b" \n\t"\
103 +"psubw "#a", "#b" \n\t"
105 +#define SBUTTERFLY(a,b,t,n,m)\
106 +"mov" #m " " #a ", " #t " \n\t" /* abcd */\
107 +"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
108 +"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
110 +#define TRANSPOSE4(a,b,c,d,t)\
111 +SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
112 +SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
113 +SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
114 +SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
116 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
117 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
118 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
119 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
120 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
121 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
123 /*****************************************************************************
126 Index: libavcodec/i386/flacdsp_mmx.c
127 ===================================================================
128 --- libavcodec/i386/flacdsp_mmx.c (revision 11888)
129 +++ libavcodec/i386/flacdsp_mmx.c (working copy)
131 double c = 2.0 / (len-1.0);
133 x86_reg i = -n2*sizeof(int32_t);
134 - x86_reg j = n2*sizeof(int32_t);
136 "movsd %0, %%xmm7 \n\t"
137 "movapd %1, %%xmm6 \n\t"
142 - :"+&r"(i), "+&r"(j)\
144 :"r"(w_data+n2), "r"(data+n2)\
148 "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
149 "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
150 "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"
151 + :: "m"(*ff_pd_1) );
154 "movapd (%4,%0), %%xmm3 \n\t"
155 "movupd -8(%5,%0), %%xmm4 \n\t"
158 "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
159 "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
160 + :: "m"(*ff_pd_1) );
163 "movapd (%3,%0), %%xmm3 \n\t"
164 "movupd -8(%4,%0), %%xmm4 \n\t"