2 * quarterpel DSP functions
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/pixels.h"
29 #include "libavcodec/qpeldsp.h"
32 void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
33 int dstStride, int src1Stride, int h);
34 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
35 uint8_t *src2, int dstStride,
36 int src1Stride, int h);
37 void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
38 int dstStride, int src1Stride, int h);
39 void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
40 int dstStride, int src1Stride, int h);
41 void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
42 int dstStride, int src1Stride, int h);
43 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
44 int dstStride, int src1Stride, int h);
45 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
46 int dstStride, int srcStride, int h);
47 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
48 int dstStride, int srcStride, int h);
49 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
50 int dstStride, int srcStride,
52 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
53 int dstStride, int srcStride, int h);
54 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
55 int dstStride, int srcStride, int h);
56 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
57 int dstStride, int srcStride,
59 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
60 int dstStride, int srcStride);
61 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
62 int dstStride, int srcStride);
63 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
64 int dstStride, int srcStride);
65 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
66 int dstStride, int srcStride);
67 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
68 int dstStride, int srcStride);
69 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
70 int dstStride, int srcStride);
71 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
72 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
76 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
77 #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
79 #define QPEL_OP(OPNAME, RND, MMX) \
80 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
83 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
86 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
90 uint8_t *const half = (uint8_t *) temp; \
91 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
93 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
97 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
100 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
104 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
108 uint8_t *const half = (uint8_t *) temp; \
109 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
111 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
115 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
119 uint8_t *const half = (uint8_t *) temp; \
120 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
122 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
123 stride, stride, 8); \
126 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
129 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
133 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
137 uint8_t *const half = (uint8_t *) temp; \
138 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
140 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
144 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
147 uint64_t half[8 + 9]; \
148 uint8_t *const halfH = (uint8_t *) half + 64; \
149 uint8_t *const halfHV = (uint8_t *) half; \
150 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
152 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
154 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
155 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
159 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
162 uint64_t half[8 + 9]; \
163 uint8_t *const halfH = (uint8_t *) half + 64; \
164 uint8_t *const halfHV = (uint8_t *) half; \
165 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
167 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
169 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
170 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
174 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
177 uint64_t half[8 + 9]; \
178 uint8_t *const halfH = (uint8_t *) half + 64; \
179 uint8_t *const halfHV = (uint8_t *) half; \
180 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
182 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
184 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
185 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
189 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
192 uint64_t half[8 + 9]; \
193 uint8_t *const halfH = (uint8_t *) half + 64; \
194 uint8_t *const halfHV = (uint8_t *) half; \
195 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
197 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
199 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
200 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
204 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
207 uint64_t half[8 + 9]; \
208 uint8_t *const halfH = (uint8_t *) half + 64; \
209 uint8_t *const halfHV = (uint8_t *) half; \
210 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
212 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
213 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
217 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
220 uint64_t half[8 + 9]; \
221 uint8_t *const halfH = (uint8_t *) half + 64; \
222 uint8_t *const halfHV = (uint8_t *) half; \
223 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
225 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
226 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
230 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
233 uint64_t half[8 + 9]; \
234 uint8_t *const halfH = (uint8_t *) half; \
235 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
237 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
239 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
243 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
246 uint64_t half[8 + 9]; \
247 uint8_t *const halfH = (uint8_t *) half; \
248 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
250 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
252 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
256 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
260 uint8_t *const halfH = (uint8_t *) half; \
261 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
263 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
267 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
270 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
273 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
277 uint8_t *const half = (uint8_t *) temp; \
278 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
280 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
284 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
287 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
288 stride, stride, 16);\
291 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
295 uint8_t *const half = (uint8_t*) temp; \
296 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
298 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
299 stride, stride, 16); \
302 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
306 uint8_t *const half = (uint8_t *) temp; \
307 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
309 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
313 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
316 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
320 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
324 uint8_t *const half = (uint8_t *) temp; \
325 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
327 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
328 stride, stride, 16); \
331 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
334 uint64_t half[16 * 2 + 17 * 2]; \
335 uint8_t *const halfH = (uint8_t *) half + 256; \
336 uint8_t *const halfHV = (uint8_t *) half; \
337 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
339 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
341 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
343 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
347 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
350 uint64_t half[16 * 2 + 17 * 2]; \
351 uint8_t *const halfH = (uint8_t *) half + 256; \
352 uint8_t *const halfHV = (uint8_t *) half; \
353 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
355 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
357 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
359 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
363 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
366 uint64_t half[16 * 2 + 17 * 2]; \
367 uint8_t *const halfH = (uint8_t *) half + 256; \
368 uint8_t *const halfHV = (uint8_t *) half; \
369 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
371 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
373 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
375 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
379 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
382 uint64_t half[16 * 2 + 17 * 2]; \
383 uint8_t *const halfH = (uint8_t *) half + 256; \
384 uint8_t *const halfHV = (uint8_t *) half; \
385 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
387 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
389 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
391 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
395 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
398 uint64_t half[16 * 2 + 17 * 2]; \
399 uint8_t *const halfH = (uint8_t *) half + 256; \
400 uint8_t *const halfHV = (uint8_t *) half; \
401 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
403 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
405 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
409 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
412 uint64_t half[16 * 2 + 17 * 2]; \
413 uint8_t *const halfH = (uint8_t *) half + 256; \
414 uint8_t *const halfHV = (uint8_t *) half; \
415 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
417 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
419 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
423 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
426 uint64_t half[17 * 2]; \
427 uint8_t *const halfH = (uint8_t *) half; \
428 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
430 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
432 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
436 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
439 uint64_t half[17 * 2]; \
440 uint8_t *const halfH = (uint8_t *) half; \
441 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
443 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
445 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
449 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
452 uint64_t half[17 * 2]; \
453 uint8_t *const halfH = (uint8_t *) half; \
454 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
456 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
460 QPEL_OP(put_, _, mmxext)
461 QPEL_OP(avg_, _, mmxext)
462 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
464 #endif /* HAVE_YASM */
466 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
468 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
469 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
470 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
471 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
472 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
473 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
474 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
475 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
476 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
477 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
478 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
479 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
480 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
481 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
482 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
483 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
486 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
488 int cpu_flags = av_get_cpu_flags();
490 if (X86_MMXEXT(cpu_flags)) {
491 #if HAVE_MMXEXT_EXTERNAL
492 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
493 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
495 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
496 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
497 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
498 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
499 #endif /* HAVE_MMXEXT_EXTERNAL */