]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/qpeldsp_init.c
8f296a11a16b5e782512fa66384437b9a3119b67
[ffmpeg] / libavcodec / x86 / qpeldsp_init.c
1 /*
2  * quarterpel DSP functions
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include <stddef.h>
22 #include <stdint.h>
23
24 #include "config.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/pixels.h"
29 #include "libavcodec/qpeldsp.h"
30 #include "fpel.h"
31
32 void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
33                               int dstStride, int src1Stride, int h);
34 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
35                                      uint8_t *src2, int dstStride,
36                                      int src1Stride, int h);
37 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
38                               int dstStride, int src1Stride, int h);
39 void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
40                                int dstStride, int src1Stride, int h);
41 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
42                                int dstStride, int src1Stride, int h);
43 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
44                                       int dstStride, int src1Stride, int h);
45 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
46                                           int dstStride, int srcStride, int h);
47 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
48                                           int dstStride, int srcStride, int h);
49 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
50                                                  int dstStride, int srcStride,
51                                                  int h);
52 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
53                                          int dstStride, int srcStride, int h);
54 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
55                                          int dstStride, int srcStride, int h);
56 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
57                                                 int dstStride, int srcStride,
58                                                 int h);
59 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
60                                           int dstStride, int srcStride);
61 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
62                                           int dstStride, int srcStride);
63 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
64                                                  int dstStride, int srcStride);
65 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
66                                          int dstStride, int srcStride);
67 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
68                                          int dstStride, int srcStride);
69 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
70                                                 int dstStride, int srcStride);
71 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
72 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
73
74 #if HAVE_YASM
75
76 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
77 #define ff_put_pixels8_mmxext  ff_put_pixels8_mmx
78
79 #define QPEL_OP(OPNAME, RND, MMX)                                       \
80 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src,    \
81                                          ptrdiff_t stride)              \
82 {                                                                       \
83     ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \
84 }                                                                       \
85                                                                         \
86 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src,    \
87                                          ptrdiff_t stride)              \
88 {                                                                       \
89     uint64_t temp[8];                                                   \
90     uint8_t *const half = (uint8_t *) temp;                             \
91     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
92                                                    stride, 8);          \
93     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
94                                         stride, stride, 8);             \
95 }                                                                       \
96                                                                         \
97 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src,    \
98                                          ptrdiff_t stride)              \
99 {                                                                       \
100     ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \
101                                                    stride, 8);          \
102 }                                                                       \
103                                                                         \
104 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src,    \
105                                          ptrdiff_t stride)              \
106 {                                                                       \
107     uint64_t temp[8];                                                   \
108     uint8_t *const half = (uint8_t *) temp;                             \
109     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
110                                                    stride, 8);          \
111     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \
112                                         stride, 8);                     \
113 }                                                                       \
114                                                                         \
115 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src,    \
116                                          ptrdiff_t stride)              \
117 {                                                                       \
118     uint64_t temp[8];                                                   \
119     uint8_t *const half = (uint8_t *) temp;                             \
120     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
121                                                    8, stride);          \
122     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
123                                         stride, stride, 8);             \
124 }                                                                       \
125                                                                         \
126 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src,    \
127                                          ptrdiff_t stride)              \
128 {                                                                       \
129     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \
130                                                    stride, stride);     \
131 }                                                                       \
132                                                                         \
133 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src,    \
134                                          ptrdiff_t stride)              \
135 {                                                                       \
136     uint64_t temp[8];                                                   \
137     uint8_t *const half = (uint8_t *) temp;                             \
138     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
139                                                    8, stride);          \
140     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
141                                         stride, 8);                     \
142 }                                                                       \
143                                                                         \
144 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src,    \
145                                          ptrdiff_t stride)              \
146 {                                                                       \
147     uint64_t half[8 + 9];                                               \
148     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
149     uint8_t *const halfHV = (uint8_t *) half;                           \
150     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
151                                                    stride, 9);          \
152     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
153                                         stride, 9);                     \
154     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
155     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
156                                         stride, 8, 8);                  \
157 }                                                                       \
158                                                                         \
159 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src,    \
160                                          ptrdiff_t stride)              \
161 {                                                                       \
162     uint64_t half[8 + 9];                                               \
163     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
164     uint8_t *const halfHV = (uint8_t *) half;                           \
165     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
166                                                    stride, 9);          \
167     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
168                                         stride, 9);                     \
169     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
170     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
171                                         stride, 8, 8);                  \
172 }                                                                       \
173                                                                         \
174 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src,    \
175                                          ptrdiff_t stride)              \
176 {                                                                       \
177     uint64_t half[8 + 9];                                               \
178     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
179     uint8_t *const halfHV = (uint8_t *) half;                           \
180     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
181                                                    stride, 9);          \
182     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
183                                         stride, 9);                     \
184     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
185     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
186                                         stride, 8, 8);                  \
187 }                                                                       \
188                                                                         \
189 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src,    \
190                                          ptrdiff_t stride)              \
191 {                                                                       \
192     uint64_t half[8 + 9];                                               \
193     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
194     uint8_t *const halfHV = (uint8_t *) half;                           \
195     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
196                                                    stride, 9);          \
197     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
198                                         stride, 9);                     \
199     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
200     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
201                                         stride, 8, 8);                  \
202 }                                                                       \
203                                                                         \
204 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src,    \
205                                          ptrdiff_t stride)              \
206 {                                                                       \
207     uint64_t half[8 + 9];                                               \
208     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
209     uint8_t *const halfHV = (uint8_t *) half;                           \
210     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
211                                                    stride, 9);          \
212     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
213     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
214                                         stride, 8, 8);                  \
215 }                                                                       \
216                                                                         \
217 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src,    \
218                                          ptrdiff_t stride)              \
219 {                                                                       \
220     uint64_t half[8 + 9];                                               \
221     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
222     uint8_t *const halfHV = (uint8_t *) half;                           \
223     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
224                                                    stride, 9);          \
225     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
226     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
227                                         stride, 8, 8);                  \
228 }                                                                       \
229                                                                         \
230 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src,    \
231                                          ptrdiff_t stride)              \
232 {                                                                       \
233     uint64_t half[8 + 9];                                               \
234     uint8_t *const halfH = (uint8_t *) half;                            \
235     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
236                                                    stride, 9);          \
237     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \
238                                         8, stride, 9);                  \
239     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
240                                                    stride, 8);          \
241 }                                                                       \
242                                                                         \
243 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src,    \
244                                          ptrdiff_t stride)              \
245 {                                                                       \
246     uint64_t half[8 + 9];                                               \
247     uint8_t *const halfH = (uint8_t *) half;                            \
248     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
249                                                    stride, 9);          \
250     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
251                                         stride, 9);                     \
252     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
253                                                    stride, 8);          \
254 }                                                                       \
255                                                                         \
256 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src,    \
257                                          ptrdiff_t stride)              \
258 {                                                                       \
259     uint64_t half[9];                                                   \
260     uint8_t *const halfH = (uint8_t *) half;                            \
261     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
262                                                    stride, 9);          \
263     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
264                                                    stride, 8);          \
265 }                                                                       \
266                                                                         \
267 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src,   \
268                                           ptrdiff_t stride)             \
269 {                                                                       \
270     ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \
271 }                                                                       \
272                                                                         \
273 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src,   \
274                                           ptrdiff_t stride)             \
275 {                                                                       \
276     uint64_t temp[32];                                                  \
277     uint8_t *const half = (uint8_t *) temp;                             \
278     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
279                                                     stride, 16);        \
280     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
281                                          stride, 16);                   \
282 }                                                                       \
283                                                                         \
284 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src,   \
285                                           ptrdiff_t stride)             \
286 {                                                                       \
287     ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \
288                                                     stride, stride, 16);\
289 }                                                                       \
290                                                                         \
291 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src,   \
292                                           ptrdiff_t stride)             \
293 {                                                                       \
294     uint64_t temp[32];                                                  \
295     uint8_t *const half = (uint8_t*) temp;                              \
296     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
297                                                     stride, 16);        \
298     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \
299                                          stride, stride, 16);           \
300 }                                                                       \
301                                                                         \
302 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src,   \
303                                           ptrdiff_t stride)             \
304 {                                                                       \
305     uint64_t temp[32];                                                  \
306     uint8_t *const half = (uint8_t *) temp;                             \
307     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
308                                                     stride);            \
309     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
310                                          stride, 16);                   \
311 }                                                                       \
312                                                                         \
313 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src,   \
314                                           ptrdiff_t stride)             \
315 {                                                                       \
316     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \
317                                                     stride, stride);    \
318 }                                                                       \
319                                                                         \
320 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src,   \
321                                           ptrdiff_t stride)             \
322 {                                                                       \
323     uint64_t temp[32];                                                  \
324     uint8_t *const half = (uint8_t *) temp;                             \
325     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
326                                                     stride);            \
327     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \
328                                          stride, stride, 16);           \
329 }                                                                       \
330                                                                         \
331 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src,   \
332                                           ptrdiff_t stride)             \
333 {                                                                       \
334     uint64_t half[16 * 2 + 17 * 2];                                     \
335     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
336     uint8_t *const halfHV = (uint8_t *) half;                           \
337     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
338                                                     stride, 17);        \
339     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
340                                          stride, 17);                   \
341     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
342                                                     16, 16);            \
343     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
344                                          stride, 16, 16);               \
345 }                                                                       \
346                                                                         \
347 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src,   \
348                                           ptrdiff_t stride)             \
349 {                                                                       \
350     uint64_t half[16 * 2 + 17 * 2];                                     \
351     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
352     uint8_t *const halfHV = (uint8_t *) half;                           \
353     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
354                                                     stride, 17);        \
355     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
356                                          stride, 17);                   \
357     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
358                                                     16, 16);            \
359     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
360                                          stride, 16, 16);               \
361 }                                                                       \
362                                                                         \
363 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src,   \
364                                           ptrdiff_t stride)             \
365 {                                                                       \
366     uint64_t half[16 * 2 + 17 * 2];                                     \
367     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
368     uint8_t *const halfHV = (uint8_t *) half;                           \
369     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
370                                                     stride, 17);        \
371     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
372                                          stride, 17);                   \
373     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
374                                                     16, 16);            \
375     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
376                                          stride, 16, 16);               \
377 }                                                                       \
378                                                                         \
379 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src,   \
380                                           ptrdiff_t stride)             \
381 {                                                                       \
382     uint64_t half[16 * 2 + 17 * 2];                                     \
383     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
384     uint8_t *const halfHV = (uint8_t *) half;                           \
385     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
386                                                     stride, 17);        \
387     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
388                                          stride, 17);                   \
389     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
390                                                     16, 16);            \
391     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
392                                          stride, 16, 16);               \
393 }                                                                       \
394                                                                         \
395 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src,   \
396                                           ptrdiff_t stride)             \
397 {                                                                       \
398     uint64_t half[16 * 2 + 17 * 2];                                     \
399     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
400     uint8_t *const halfHV = (uint8_t *) half;                           \
401     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
402                                                     stride, 17);        \
403     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
404                                                     16, 16);            \
405     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
406                                          stride, 16, 16);               \
407 }                                                                       \
408                                                                         \
409 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src,   \
410                                           ptrdiff_t stride)             \
411 {                                                                       \
412     uint64_t half[16 * 2 + 17 * 2];                                     \
413     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
414     uint8_t *const halfHV = (uint8_t *) half;                           \
415     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
416                                                     stride, 17);        \
417     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
418                                                     16, 16);            \
419     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
420                                          stride, 16, 16);               \
421 }                                                                       \
422                                                                         \
423 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src,   \
424                                           ptrdiff_t stride)             \
425 {                                                                       \
426     uint64_t half[17 * 2];                                              \
427     uint8_t *const halfH = (uint8_t *) half;                            \
428     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
429                                                     stride, 17);        \
430     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
431                                          stride, 17);                   \
432     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
433                                                     stride, 16);        \
434 }                                                                       \
435                                                                         \
436 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src,   \
437                                           ptrdiff_t stride)             \
438 {                                                                       \
439     uint64_t half[17 * 2];                                              \
440     uint8_t *const halfH = (uint8_t *) half;                            \
441     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
442                                                     stride, 17);        \
443     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
444                                          stride, 17);                   \
445     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
446                                                     stride, 16);        \
447 }                                                                       \
448                                                                         \
449 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src,   \
450                                           ptrdiff_t stride)             \
451 {                                                                       \
452     uint64_t half[17 * 2];                                              \
453     uint8_t *const halfH = (uint8_t *) half;                            \
454     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
455                                                     stride, 17);        \
456     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
457                                                     stride, 16);        \
458 }
459
460 QPEL_OP(put_,        _,        mmxext)
461 QPEL_OP(avg_,        _,        mmxext)
462 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
463
464 #endif /* HAVE_YASM */
465
466 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
467 do {                                                                         \
468     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
469     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
470     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
471     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
472     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
473     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
474     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
475     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
476     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
477     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
478     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
479     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
480     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
481     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
482     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
483     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
484 } while (0)
485
486 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
487 {
488     int cpu_flags = av_get_cpu_flags();
489
490     if (X86_MMXEXT(cpu_flags)) {
491 #if HAVE_MMXEXT_EXTERNAL
492         SET_QPEL_FUNCS(avg_qpel,        0, 16, mmxext, );
493         SET_QPEL_FUNCS(avg_qpel,        1,  8, mmxext, );
494
495         SET_QPEL_FUNCS(put_qpel,        0, 16, mmxext, );
496         SET_QPEL_FUNCS(put_qpel,        1,  8, mmxext, );
497         SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
498         SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmxext, );
499 #endif /* HAVE_MMXEXT_EXTERNAL */
500     }
501 }