1 Index: libavcodec/i386/motion_est_mmx.c
2 ===================================================================
3 --- libavcodec/i386/motion_est_mmx.c (revision 11270)
4 +++ libavcodec/i386/motion_est_mmx.c (working copy)
6 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
9 - "movq "MANGLE(bone)", %%mm5 \n\t"
10 + "movq %4, %%mm5 \n\t"
11 "movq (%1), %%mm0 \n\t"
12 "pavgb 1(%1), %%mm0 \n\t"
17 : "+r" (h), "+r" (blk1), "+r" (blk2)
18 - : "r" ((long)stride)
19 + : "r" ((long)stride), "m" (bone)
24 "punpckhbw %%mm7, %%mm5 \n\t"
25 "paddw %%mm4, %%mm2 \n\t"
26 "paddw %%mm5, %%mm3 \n\t"
27 - "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
28 + "movq 16+%5, %%mm5 \n\t"
29 "paddw %%mm2, %%mm0 \n\t"
30 "paddw %%mm3, %%mm1 \n\t"
31 "paddw %%mm5, %%mm0 \n\t"
33 "add %4, %%"REG_a" \n\t"
36 - : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
37 + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0])
41 Index: libavcodec/i386/dsputil_mmx.c
42 ===================================================================
43 --- libavcodec/i386/dsputil_mmx.c (revision 11879)
44 +++ libavcodec/i386/dsputil_mmx.c (working copy)
47 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
48 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
49 - "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
50 + "movq "#pw_20", %%mm4 \n\t" /* 20 */\
51 "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
52 "movq "#in7", " #m3 " \n\t" /* d */\
53 "movq "#in0", %%mm5 \n\t" /* D */\
55 "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
56 "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
57 "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
58 - "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
59 + "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\
60 "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
61 "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
62 "psraw $5, %%mm5 \n\t"\
63 @@ -1973,10 +1973,10 @@
64 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
65 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
66 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
67 - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
68 + "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
69 "paddw %%mm4, %%mm0 \n\t" /* a */\
70 "paddw %%mm1, %%mm5 \n\t" /* d */\
71 - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
72 + "pmullw %7, %%mm0 \n\t" /* 20a */\
73 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
74 "paddw %6, %%mm6 \n\t"\
75 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
76 @@ -1999,10 +1999,10 @@
77 "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
78 "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
79 "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
80 - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
81 + "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
82 "paddw %%mm2, %%mm1 \n\t" /* a */\
83 "paddw %%mm6, %%mm4 \n\t" /* d */\
84 - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
85 + "pmullw %7, %%mm1 \n\t" /* 20a */\
86 "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
87 "paddw %6, %%mm1 \n\t"\
88 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
90 "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
91 "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
92 "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
93 - "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
94 + "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\
95 "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
96 "paddw %%mm3, %%mm2 \n\t" /* d */\
97 "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
99 "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
100 "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
101 "paddw %%mm2, %%mm6 \n\t" /* a */\
102 - "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
103 + "pmullw %7, %%mm6 \n\t" /* 20a */\
104 "paddw %6, %%mm0 \n\t"\
105 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
106 "psraw $5, %%mm0 \n\t"\
107 @@ -2048,8 +2048,8 @@
108 "paddw %%mm2, %%mm5 \n\t" /* d */\
109 "paddw %%mm6, %%mm6 \n\t" /* 2b */\
110 "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
111 - "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
112 - "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
113 + "pmullw %7, %%mm3 \n\t" /* 20a */\
114 + "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\
115 "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
116 "paddw %6, %%mm4 \n\t"\
117 "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
118 @@ -2062,7 +2062,9 @@
121 : "+a"(src), "+c"(dst), "+g"(h)\
122 - : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
123 + : "d"((long)srcStride), "S"((long)dstStride),\
124 + "m"(temp), "m"(ROUNDER),\
125 + "m"(ff_pw_20), "m"(ff_pw_3)\
129 @@ -2140,10 +2142,10 @@
130 "paddw %%mm5, %%mm5 \n\t" /* 2b */\
131 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
132 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
133 - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
134 + "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\
135 "paddw %%mm4, %%mm0 \n\t" /* a */\
136 "paddw %%mm1, %%mm5 \n\t" /* d */\
137 - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
138 + "pmullw %7, %%mm0 \n\t" /* 20a */\
139 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
140 "paddw %6, %%mm6 \n\t"\
141 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
142 @@ -2161,8 +2163,8 @@
143 "paddw %%mm5, %%mm4 \n\t" /* d */\
144 "paddw %%mm2, %%mm2 \n\t" /* 2b */\
145 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
146 - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
147 - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
148 + "pmullw %7, %%mm1 \n\t" /* 20a */\
149 + "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\
150 "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
151 "paddw %6, %%mm1 \n\t"\
152 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
153 @@ -2175,7 +2177,9 @@
156 : "+a"(src), "+c"(dst), "+g"(h)\
157 - : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
158 + : "S"((long)srcStride), "D"((long)dstStride),\
159 + "m"(temp), "m"(ROUNDER),\
160 + "m"(ff_pw_20), "m"(ff_pw_3)\
164 @@ -2254,31 +2258,31 @@
165 "movq 8(%0), %%mm1 \n\t"\
166 "movq 16(%0), %%mm2 \n\t"\
167 "movq 24(%0), %%mm3 \n\t"\
168 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
169 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
170 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
171 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
173 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
174 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
176 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
177 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
179 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
180 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
181 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
182 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
184 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
185 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
186 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
187 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
189 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
190 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
191 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
192 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
194 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
195 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
196 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
197 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
199 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
200 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
202 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
203 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
205 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
206 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
207 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
208 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
212 @@ -2286,7 +2290,9 @@
215 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
216 - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
217 + : "r"((long)dstStride), "r"(2*(long)dstStride),\
218 + "m"(ROUNDER), "g"(4-14*(long)dstStride),\
219 + "m"(ff_pw_20), "m"(ff_pw_3)\
223 @@ -2326,19 +2332,19 @@
224 "movq 8(%0), %%mm1 \n\t"\
225 "movq 16(%0), %%mm2 \n\t"\
226 "movq 24(%0), %%mm3 \n\t"\
227 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
228 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
229 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
230 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
232 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
233 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
235 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
236 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
238 - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
239 + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
241 - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
242 + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
244 - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
245 - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
246 + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
247 + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
251 @@ -2346,7 +2352,9 @@
254 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
255 - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
256 + : "r"((long)dstStride), "r"(2*(long)dstStride),\
257 + "m"(ROUNDER), "g"(4-6*(long)dstStride),\
258 + "m"(ff_pw_20), "m"(ff_pw_3)\
262 Index: libavcodec/i386/simple_idct_mmx.c
263 ===================================================================
264 --- libavcodec/i386/simple_idct_mmx.c (revision 11270)
265 +++ libavcodec/i386/simple_idct_mmx.c (working copy)
267 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
268 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
269 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
270 - "movq "MANGLE(wm1010)", %%mm4 \n\t"\
271 + "movq %3, %%mm4 \n\t"\
272 "pand %%mm0, %%mm4 \n\t"\
273 "por %%mm1, %%mm4 \n\t"\
274 "por %%mm2, %%mm4 \n\t"\
278 "pslld $16, %%mm0 \n\t"\
279 - "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
280 + "#paddd %4, %%mm0 \n\t"\
281 "psrad $13, %%mm0 \n\t"\
282 "packssdw %%mm0, %%mm0 \n\t"\
283 "movq %%mm0, " #dst " \n\t"\
285 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
286 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
287 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
288 - "movq "MANGLE(wm1010)", %%mm4 \n\t"\
289 + "movq %3, %%mm4 \n\t"\
290 "pand %%mm0, %%mm4 \n\t"\
291 "por %%mm1, %%mm4 \n\t"\
292 "por %%mm2, %%mm4 \n\t"\
296 "pslld $16, %%mm0 \n\t"\
297 - "paddd "MANGLE(d40000)", %%mm0 \n\t"\
298 + "paddd %4, %%mm0 \n\t"\
299 "psrad $13, %%mm0 \n\t"\
300 "packssdw %%mm0, %%mm0 \n\t"\
301 "movq %%mm0, " #dst " \n\t"\
302 @@ -1270,7 +1270,7 @@
306 - :: "r" (block), "r" (temp), "r" (coeffs)
307 + :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000)
311 Index: libavcodec/i386/cavsdsp_mmx.c
312 ===================================================================
313 --- libavcodec/i386/cavsdsp_mmx.c (revision 11727)
314 +++ libavcodec/i386/cavsdsp_mmx.c (working copy)
319 -#include "dsputil_mmx.h"
322 +#define SUMSUB_BA( a, b ) \
323 +"paddw "#b", "#a" \n\t"\
324 +"paddw "#b", "#b" \n\t"\
325 +"psubw "#a", "#b" \n\t"
327 +#define SBUTTERFLY(a,b,t,n,m)\
328 +"mov" #m " " #a ", " #t " \n\t" /* abcd */\
329 +"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
330 +"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
332 +#define TRANSPOSE4(a,b,c,d,t)\
333 +SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
334 +SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
335 +SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
336 +SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
338 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
339 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
340 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
341 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
342 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
343 +DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
345 /*****************************************************************************