2 * quarterpel DSP functions
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/pixels.h"
31 #include "libavcodec/qpeldsp.h"
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35 const uint8_t *src1, const uint8_t *src2,
36 int dstStride, int src1Stride, int h);
37 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
38 const uint8_t *src1, const uint8_t *src2,
39 int dstStride, int src1Stride, int h);
40 void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41 const uint8_t *src1, const uint8_t *src2,
42 int dstStride, int src1Stride, int h);
43 void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44 const uint8_t *src1, const uint8_t *src2,
45 int dstStride, int src1Stride, int h);
46 void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47 const uint8_t *src1, const uint8_t *src2,
48 int dstStride, int src1Stride, int h);
49 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
50 const uint8_t *src1, const uint8_t *src2,
51 int dstStride, int src1Stride, int h);
52 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53 int dstStride, int srcStride, int h);
54 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55 int dstStride, int srcStride, int h);
56 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
58 int dstStride, int srcStride,
60 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61 int dstStride, int srcStride, int h);
62 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63 int dstStride, int srcStride, int h);
64 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
66 int dstStride, int srcStride,
68 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69 int dstStride, int srcStride);
70 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71 int dstStride, int srcStride);
72 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
74 int dstStride, int srcStride);
75 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76 int dstStride, int srcStride);
77 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78 int dstStride, int srcStride);
79 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
81 int dstStride, int srcStride);
82 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
83 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
87 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
88 #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
90 #define QPEL_OP(OPNAME, RND, MMX) \
91 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
95 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
98 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
103 uint8_t *const half = (uint8_t *) temp; \
104 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
106 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
107 stride, stride, 8); \
110 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
111 const uint8_t *src, \
114 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
118 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
119 const uint8_t *src, \
123 uint8_t *const half = (uint8_t *) temp; \
124 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
126 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
130 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
131 const uint8_t *src, \
135 uint8_t *const half = (uint8_t *) temp; \
136 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
138 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
139 stride, stride, 8); \
142 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
143 const uint8_t *src, \
146 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
150 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
151 const uint8_t *src, \
155 uint8_t *const half = (uint8_t *) temp; \
156 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
158 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
162 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
163 const uint8_t *src, \
166 uint64_t half[8 + 9]; \
167 uint8_t *const halfH = (uint8_t *) half + 64; \
168 uint8_t *const halfHV = (uint8_t *) half; \
169 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
171 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
173 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
174 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
178 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
179 const uint8_t *src, \
182 uint64_t half[8 + 9]; \
183 uint8_t *const halfH = (uint8_t *) half + 64; \
184 uint8_t *const halfHV = (uint8_t *) half; \
185 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
187 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
189 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
190 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
194 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
195 const uint8_t *src, \
198 uint64_t half[8 + 9]; \
199 uint8_t *const halfH = (uint8_t *) half + 64; \
200 uint8_t *const halfHV = (uint8_t *) half; \
201 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
203 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
205 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
206 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
210 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
211 const uint8_t *src, \
214 uint64_t half[8 + 9]; \
215 uint8_t *const halfH = (uint8_t *) half + 64; \
216 uint8_t *const halfHV = (uint8_t *) half; \
217 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
219 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
221 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
222 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
226 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
227 const uint8_t *src, \
230 uint64_t half[8 + 9]; \
231 uint8_t *const halfH = (uint8_t *) half + 64; \
232 uint8_t *const halfHV = (uint8_t *) half; \
233 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
235 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
236 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
240 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
241 const uint8_t *src, \
244 uint64_t half[8 + 9]; \
245 uint8_t *const halfH = (uint8_t *) half + 64; \
246 uint8_t *const halfHV = (uint8_t *) half; \
247 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
249 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
250 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
254 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
255 const uint8_t *src, \
258 uint64_t half[8 + 9]; \
259 uint8_t *const halfH = (uint8_t *) half; \
260 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
262 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
264 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
268 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
269 const uint8_t *src, \
272 uint64_t half[8 + 9]; \
273 uint8_t *const halfH = (uint8_t *) half; \
274 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
276 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
278 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
282 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
283 const uint8_t *src, \
287 uint8_t *const halfH = (uint8_t *) half; \
288 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
290 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
294 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
295 const uint8_t *src, \
298 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
301 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
302 const uint8_t *src, \
306 uint8_t *const half = (uint8_t *) temp; \
307 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
309 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
313 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
314 const uint8_t *src, \
317 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
318 stride, stride, 16);\
321 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
322 const uint8_t *src, \
326 uint8_t *const half = (uint8_t*) temp; \
327 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
329 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
330 stride, stride, 16); \
333 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
334 const uint8_t *src, \
338 uint8_t *const half = (uint8_t *) temp; \
339 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
341 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
345 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
346 const uint8_t *src, \
349 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
353 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
354 const uint8_t *src, \
358 uint8_t *const half = (uint8_t *) temp; \
359 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
361 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
362 stride, stride, 16); \
365 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
366 const uint8_t *src, \
369 uint64_t half[16 * 2 + 17 * 2]; \
370 uint8_t *const halfH = (uint8_t *) half + 256; \
371 uint8_t *const halfHV = (uint8_t *) half; \
372 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
374 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
376 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
378 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
382 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
383 const uint8_t *src, \
386 uint64_t half[16 * 2 + 17 * 2]; \
387 uint8_t *const halfH = (uint8_t *) half + 256; \
388 uint8_t *const halfHV = (uint8_t *) half; \
389 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
391 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
393 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
395 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
399 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
400 const uint8_t *src, \
403 uint64_t half[16 * 2 + 17 * 2]; \
404 uint8_t *const halfH = (uint8_t *) half + 256; \
405 uint8_t *const halfHV = (uint8_t *) half; \
406 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
408 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
410 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
412 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
416 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
417 const uint8_t *src, \
420 uint64_t half[16 * 2 + 17 * 2]; \
421 uint8_t *const halfH = (uint8_t *) half + 256; \
422 uint8_t *const halfHV = (uint8_t *) half; \
423 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
425 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
427 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
429 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
433 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
434 const uint8_t *src, \
437 uint64_t half[16 * 2 + 17 * 2]; \
438 uint8_t *const halfH = (uint8_t *) half + 256; \
439 uint8_t *const halfHV = (uint8_t *) half; \
440 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
442 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
444 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
448 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
449 const uint8_t *src, \
452 uint64_t half[16 * 2 + 17 * 2]; \
453 uint8_t *const halfH = (uint8_t *) half + 256; \
454 uint8_t *const halfHV = (uint8_t *) half; \
455 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
457 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
459 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
463 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
464 const uint8_t *src, \
467 uint64_t half[17 * 2]; \
468 uint8_t *const halfH = (uint8_t *) half; \
469 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
471 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
473 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
477 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
478 const uint8_t *src, \
481 uint64_t half[17 * 2]; \
482 uint8_t *const halfH = (uint8_t *) half; \
483 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
485 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
487 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
491 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
492 const uint8_t *src, \
495 uint64_t half[17 * 2]; \
496 uint8_t *const halfH = (uint8_t *) half; \
497 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
499 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
503 QPEL_OP(put_, _, mmxext)
504 QPEL_OP(avg_, _, mmxext)
505 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
507 #endif /* HAVE_X86ASM */
509 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
511 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
512 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
513 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
514 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
515 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
516 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
517 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
518 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
519 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
520 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
521 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
522 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
523 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
524 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
525 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
526 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
529 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
531 int cpu_flags = av_get_cpu_flags();
533 if (X86_MMXEXT(cpu_flags)) {
534 #if HAVE_MMXEXT_EXTERNAL
535 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
536 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
538 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
539 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
540 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
541 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
542 #endif /* HAVE_MMXEXT_EXTERNAL */