]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/qpeldsp_init.c
Merge commit '20f95f21f9b9595608ba668a6eca78f2d508be67'
[ffmpeg] / libavcodec / x86 / qpeldsp_init.c
1 /*
2  * quarterpel DSP functions
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/pixels.h"
31 #include "libavcodec/qpeldsp.h"
32 #include "fpel.h"
33
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
35                               int dstStride, int src1Stride, int h);
36 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
37                                      uint8_t *src2, int dstStride,
38                                      int src1Stride, int h);
39 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
40                               int dstStride, int src1Stride, int h);
41 void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
42                                int dstStride, int src1Stride, int h);
43 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
44                                int dstStride, int src1Stride, int h);
45 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
46                                       int dstStride, int src1Stride, int h);
47 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
48                                           int dstStride, int srcStride, int h);
49 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
50                                           int dstStride, int srcStride, int h);
51 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
52                                                  int dstStride, int srcStride,
53                                                  int h);
54 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
55                                          int dstStride, int srcStride, int h);
56 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
57                                          int dstStride, int srcStride, int h);
58 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
59                                                 int dstStride, int srcStride,
60                                                 int h);
61 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
62                                           int dstStride, int srcStride);
63 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
64                                           int dstStride, int srcStride);
65 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
66                                                  int dstStride, int srcStride);
67 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
68                                          int dstStride, int srcStride);
69 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
70                                          int dstStride, int srcStride);
71 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
72                                                 int dstStride, int srcStride);
73 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
74 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
75
76 #if HAVE_YASM
77
78 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
79 #define ff_put_pixels8_mmxext  ff_put_pixels8_mmx
80
81 #define QPEL_OP(OPNAME, RND, MMX)                                       \
82 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src,    \
83                                          ptrdiff_t stride)              \
84 {                                                                       \
85     ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \
86 }                                                                       \
87                                                                         \
88 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src,    \
89                                          ptrdiff_t stride)              \
90 {                                                                       \
91     uint64_t temp[8];                                                   \
92     uint8_t *const half = (uint8_t *) temp;                             \
93     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
94                                                    stride, 8);          \
95     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
96                                         stride, stride, 8);             \
97 }                                                                       \
98                                                                         \
99 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src,    \
100                                          ptrdiff_t stride)              \
101 {                                                                       \
102     ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \
103                                                    stride, 8);          \
104 }                                                                       \
105                                                                         \
106 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src,    \
107                                          ptrdiff_t stride)              \
108 {                                                                       \
109     uint64_t temp[8];                                                   \
110     uint8_t *const half = (uint8_t *) temp;                             \
111     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
112                                                    stride, 8);          \
113     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \
114                                         stride, 8);                     \
115 }                                                                       \
116                                                                         \
117 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src,    \
118                                          ptrdiff_t stride)              \
119 {                                                                       \
120     uint64_t temp[8];                                                   \
121     uint8_t *const half = (uint8_t *) temp;                             \
122     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
123                                                    8, stride);          \
124     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
125                                         stride, stride, 8);             \
126 }                                                                       \
127                                                                         \
128 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src,    \
129                                          ptrdiff_t stride)              \
130 {                                                                       \
131     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \
132                                                    stride, stride);     \
133 }                                                                       \
134                                                                         \
135 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src,    \
136                                          ptrdiff_t stride)              \
137 {                                                                       \
138     uint64_t temp[8];                                                   \
139     uint8_t *const half = (uint8_t *) temp;                             \
140     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
141                                                    8, stride);          \
142     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
143                                         stride, 8);                     \
144 }                                                                       \
145                                                                         \
146 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src,    \
147                                          ptrdiff_t stride)              \
148 {                                                                       \
149     uint64_t half[8 + 9];                                               \
150     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
151     uint8_t *const halfHV = (uint8_t *) half;                           \
152     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
153                                                    stride, 9);          \
154     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
155                                         stride, 9);                     \
156     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
157     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
158                                         stride, 8, 8);                  \
159 }                                                                       \
160                                                                         \
161 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src,    \
162                                          ptrdiff_t stride)              \
163 {                                                                       \
164     uint64_t half[8 + 9];                                               \
165     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
166     uint8_t *const halfHV = (uint8_t *) half;                           \
167     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
168                                                    stride, 9);          \
169     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
170                                         stride, 9);                     \
171     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
172     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
173                                         stride, 8, 8);                  \
174 }                                                                       \
175                                                                         \
176 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src,    \
177                                          ptrdiff_t stride)              \
178 {                                                                       \
179     uint64_t half[8 + 9];                                               \
180     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
181     uint8_t *const halfHV = (uint8_t *) half;                           \
182     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
183                                                    stride, 9);          \
184     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
185                                         stride, 9);                     \
186     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
187     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
188                                         stride, 8, 8);                  \
189 }                                                                       \
190                                                                         \
191 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src,    \
192                                          ptrdiff_t stride)              \
193 {                                                                       \
194     uint64_t half[8 + 9];                                               \
195     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
196     uint8_t *const halfHV = (uint8_t *) half;                           \
197     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
198                                                    stride, 9);          \
199     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
200                                         stride, 9);                     \
201     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
202     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
203                                         stride, 8, 8);                  \
204 }                                                                       \
205                                                                         \
206 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src,    \
207                                          ptrdiff_t stride)              \
208 {                                                                       \
209     uint64_t half[8 + 9];                                               \
210     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
211     uint8_t *const halfHV = (uint8_t *) half;                           \
212     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
213                                                    stride, 9);          \
214     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
215     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
216                                         stride, 8, 8);                  \
217 }                                                                       \
218                                                                         \
219 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src,    \
220                                          ptrdiff_t stride)              \
221 {                                                                       \
222     uint64_t half[8 + 9];                                               \
223     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
224     uint8_t *const halfHV = (uint8_t *) half;                           \
225     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
226                                                    stride, 9);          \
227     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
228     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
229                                         stride, 8, 8);                  \
230 }                                                                       \
231                                                                         \
232 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src,    \
233                                          ptrdiff_t stride)              \
234 {                                                                       \
235     uint64_t half[8 + 9];                                               \
236     uint8_t *const halfH = (uint8_t *) half;                            \
237     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
238                                                    stride, 9);          \
239     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \
240                                         8, stride, 9);                  \
241     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
242                                                    stride, 8);          \
243 }                                                                       \
244                                                                         \
245 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src,    \
246                                          ptrdiff_t stride)              \
247 {                                                                       \
248     uint64_t half[8 + 9];                                               \
249     uint8_t *const halfH = (uint8_t *) half;                            \
250     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
251                                                    stride, 9);          \
252     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
253                                         stride, 9);                     \
254     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
255                                                    stride, 8);          \
256 }                                                                       \
257                                                                         \
258 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src,    \
259                                          ptrdiff_t stride)              \
260 {                                                                       \
261     uint64_t half[9];                                                   \
262     uint8_t *const halfH = (uint8_t *) half;                            \
263     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
264                                                    stride, 9);          \
265     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
266                                                    stride, 8);          \
267 }                                                                       \
268                                                                         \
269 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src,   \
270                                           ptrdiff_t stride)             \
271 {                                                                       \
272     ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \
273 }                                                                       \
274                                                                         \
275 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src,   \
276                                           ptrdiff_t stride)             \
277 {                                                                       \
278     uint64_t temp[32];                                                  \
279     uint8_t *const half = (uint8_t *) temp;                             \
280     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
281                                                     stride, 16);        \
282     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
283                                          stride, 16);                   \
284 }                                                                       \
285                                                                         \
286 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src,   \
287                                           ptrdiff_t stride)             \
288 {                                                                       \
289     ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \
290                                                     stride, stride, 16);\
291 }                                                                       \
292                                                                         \
293 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src,   \
294                                           ptrdiff_t stride)             \
295 {                                                                       \
296     uint64_t temp[32];                                                  \
297     uint8_t *const half = (uint8_t*) temp;                              \
298     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
299                                                     stride, 16);        \
300     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \
301                                          stride, stride, 16);           \
302 }                                                                       \
303                                                                         \
304 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src,   \
305                                           ptrdiff_t stride)             \
306 {                                                                       \
307     uint64_t temp[32];                                                  \
308     uint8_t *const half = (uint8_t *) temp;                             \
309     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
310                                                     stride);            \
311     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
312                                          stride, 16);                   \
313 }                                                                       \
314                                                                         \
315 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src,   \
316                                           ptrdiff_t stride)             \
317 {                                                                       \
318     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \
319                                                     stride, stride);    \
320 }                                                                       \
321                                                                         \
322 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src,   \
323                                           ptrdiff_t stride)             \
324 {                                                                       \
325     uint64_t temp[32];                                                  \
326     uint8_t *const half = (uint8_t *) temp;                             \
327     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
328                                                     stride);            \
329     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \
330                                          stride, stride, 16);           \
331 }                                                                       \
332                                                                         \
333 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src,   \
334                                           ptrdiff_t stride)             \
335 {                                                                       \
336     uint64_t half[16 * 2 + 17 * 2];                                     \
337     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
338     uint8_t *const halfHV = (uint8_t *) half;                           \
339     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
340                                                     stride, 17);        \
341     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
342                                          stride, 17);                   \
343     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
344                                                     16, 16);            \
345     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
346                                          stride, 16, 16);               \
347 }                                                                       \
348                                                                         \
349 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src,   \
350                                           ptrdiff_t stride)             \
351 {                                                                       \
352     uint64_t half[16 * 2 + 17 * 2];                                     \
353     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
354     uint8_t *const halfHV = (uint8_t *) half;                           \
355     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
356                                                     stride, 17);        \
357     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
358                                          stride, 17);                   \
359     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
360                                                     16, 16);            \
361     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
362                                          stride, 16, 16);               \
363 }                                                                       \
364                                                                         \
365 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src,   \
366                                           ptrdiff_t stride)             \
367 {                                                                       \
368     uint64_t half[16 * 2 + 17 * 2];                                     \
369     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
370     uint8_t *const halfHV = (uint8_t *) half;                           \
371     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
372                                                     stride, 17);        \
373     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
374                                          stride, 17);                   \
375     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
376                                                     16, 16);            \
377     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
378                                          stride, 16, 16);               \
379 }                                                                       \
380                                                                         \
381 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src,   \
382                                           ptrdiff_t stride)             \
383 {                                                                       \
384     uint64_t half[16 * 2 + 17 * 2];                                     \
385     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
386     uint8_t *const halfHV = (uint8_t *) half;                           \
387     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
388                                                     stride, 17);        \
389     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
390                                          stride, 17);                   \
391     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
392                                                     16, 16);            \
393     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
394                                          stride, 16, 16);               \
395 }                                                                       \
396                                                                         \
397 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src,   \
398                                           ptrdiff_t stride)             \
399 {                                                                       \
400     uint64_t half[16 * 2 + 17 * 2];                                     \
401     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
402     uint8_t *const halfHV = (uint8_t *) half;                           \
403     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
404                                                     stride, 17);        \
405     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
406                                                     16, 16);            \
407     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
408                                          stride, 16, 16);               \
409 }                                                                       \
410                                                                         \
411 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src,   \
412                                           ptrdiff_t stride)             \
413 {                                                                       \
414     uint64_t half[16 * 2 + 17 * 2];                                     \
415     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
416     uint8_t *const halfHV = (uint8_t *) half;                           \
417     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
418                                                     stride, 17);        \
419     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
420                                                     16, 16);            \
421     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
422                                          stride, 16, 16);               \
423 }                                                                       \
424                                                                         \
425 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src,   \
426                                           ptrdiff_t stride)             \
427 {                                                                       \
428     uint64_t half[17 * 2];                                              \
429     uint8_t *const halfH = (uint8_t *) half;                            \
430     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
431                                                     stride, 17);        \
432     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
433                                          stride, 17);                   \
434     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
435                                                     stride, 16);        \
436 }                                                                       \
437                                                                         \
438 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src,   \
439                                           ptrdiff_t stride)             \
440 {                                                                       \
441     uint64_t half[17 * 2];                                              \
442     uint8_t *const halfH = (uint8_t *) half;                            \
443     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
444                                                     stride, 17);        \
445     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
446                                          stride, 17);                   \
447     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
448                                                     stride, 16);        \
449 }                                                                       \
450                                                                         \
451 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src,   \
452                                           ptrdiff_t stride)             \
453 {                                                                       \
454     uint64_t half[17 * 2];                                              \
455     uint8_t *const halfH = (uint8_t *) half;                            \
456     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
457                                                     stride, 17);        \
458     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
459                                                     stride, 16);        \
460 }
461
462 QPEL_OP(put_,        _,        mmxext)
463 QPEL_OP(avg_,        _,        mmxext)
464 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
465
466 #endif /* HAVE_YASM */
467
468 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
469 do {                                                                         \
470     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
471     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
472     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
473     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
474     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
475     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
476     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
477     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
478     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
479     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
480     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
481     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
482     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
483     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
484     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
485     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
486 } while (0)
487
488 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
489 {
490     int cpu_flags = av_get_cpu_flags();
491
492     if (X86_MMXEXT(cpu_flags)) {
493 #if HAVE_MMXEXT_EXTERNAL
494         SET_QPEL_FUNCS(avg_qpel,        0, 16, mmxext, );
495         SET_QPEL_FUNCS(avg_qpel,        1,  8, mmxext, );
496
497         SET_QPEL_FUNCS(put_qpel,        0, 16, mmxext, );
498         SET_QPEL_FUNCS(put_qpel,        1,  8, mmxext, );
499         SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
500         SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmxext, );
501 #endif /* HAVE_MMXEXT_EXTERNAL */
502     }
503 }