3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "attributes.h"
30 #include "mpeg2_internal.h"
36 /* MMX code - needs a rewrite */
39 * Motion Compensation frequently needs to average values using the
40 * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
41 * to compute this, but it's been left out of classic MMX.
43 * We need to be careful of overflows when doing this computation.
44 * Rather than unpacking data to 16-bits, which reduces parallelism,
45 * we use the following formulas:
47 * (x+y)>>1 == (x&y)+((x^y)>>1)
48 * (x+y+1)>>1 == (x|y)-((x^y)>>1)
51 /* some rounding constants */
52 static mmx_t mask1 = {0xfefefefefefefefeLL};
53 static mmx_t round4 = {0x0002000200020002LL};
56 * This code should probably be compiled with loop unrolling
57 * (ie, -funroll-loops in gcc)becuase some of the loops
58 * use a small static number of iterations. This was written
59 * with the assumption the compiler knows best about when
63 static inline void mmx_zero_reg ()
69 static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1,
72 /* *dest = (*src1 + *src2 + 1)/ 2; */
74 movq_m2r (*src1, mm1); /* load 8 src1 bytes */
75 movq_r2r (mm1, mm2); /* copy 8 src1 bytes */
77 movq_m2r (*src2, mm3); /* load 8 src2 bytes */
78 movq_r2r (mm3, mm4); /* copy 8 src2 bytes */
80 pxor_r2r (mm1, mm3); /* xor src1 and src2 */
81 pand_m2r (mask1, mm3); /* mask lower bits */
82 psrlq_i2r (1, mm3); /* /2 */
83 por_r2r (mm2, mm4); /* or src1 and src2 */
84 psubb_r2r (mm3, mm4); /* subtract subresults */
85 movq_r2m (mm4, *dest); /* store result in dest */
88 static inline void mmx_interp_average_2_U8 (uint8_t * dest,
92 /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
94 movq_m2r (*dest, mm1); /* load 8 dest bytes */
95 movq_r2r (mm1, mm2); /* copy 8 dest bytes */
97 movq_m2r (*src1, mm3); /* load 8 src1 bytes */
98 movq_r2r (mm3, mm4); /* copy 8 src1 bytes */
100 movq_m2r (*src2, mm5); /* load 8 src2 bytes */
101 movq_r2r (mm5, mm6); /* copy 8 src2 bytes */
103 pxor_r2r (mm3, mm5); /* xor src1 and src2 */
104 pand_m2r (mask1, mm5); /* mask lower bits */
105 psrlq_i2r (1, mm5); /* /2 */
106 por_r2r (mm4, mm6); /* or src1 and src2 */
107 psubb_r2r (mm5, mm6); /* subtract subresults */
108 movq_r2r (mm6, mm5); /* copy subresult */
110 pxor_r2r (mm1, mm5); /* xor srcavg and dest */
111 pand_m2r (mask1, mm5); /* mask lower bits */
112 psrlq_i2r (1, mm5); /* /2 */
113 por_r2r (mm2, mm6); /* or srcavg and dest */
114 psubb_r2r (mm5, mm6); /* subtract subresults */
115 movq_r2m (mm6, *dest); /* store result in dest */
118 static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1,
119 const uint8_t * src2,
120 const uint8_t * src3,
121 const uint8_t * src4)
123 /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
125 movq_m2r (*src1, mm1); /* load 8 src1 bytes */
126 movq_r2r (mm1, mm2); /* copy 8 src1 bytes */
128 punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */
129 punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */
131 movq_m2r (*src2, mm3); /* load 8 src2 bytes */
132 movq_r2r (mm3, mm4); /* copy 8 src2 bytes */
134 punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */
135 punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */
137 paddw_r2r (mm3, mm1); /* add lows */
138 paddw_r2r (mm4, mm2); /* add highs */
140 /* now have partials in mm1 and mm2 */
142 movq_m2r (*src3, mm3); /* load 8 src3 bytes */
143 movq_r2r (mm3, mm4); /* copy 8 src3 bytes */
145 punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */
146 punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */
148 paddw_r2r (mm3, mm1); /* add lows */
149 paddw_r2r (mm4, mm2); /* add highs */
151 movq_m2r (*src4, mm5); /* load 8 src4 bytes */
152 movq_r2r (mm5, mm6); /* copy 8 src4 bytes */
154 punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */
155 punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */
157 paddw_r2r (mm5, mm1); /* add lows */
158 paddw_r2r (mm6, mm2); /* add highs */
160 /* now have subtotal in mm1 and mm2 */
162 paddw_m2r (round4, mm1);
163 psraw_i2r (2, mm1); /* /4 */
164 paddw_m2r (round4, mm2);
165 psraw_i2r (2, mm2); /* /4 */
167 packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */
168 movq_r2m (mm1, *dest); /* store result in dest */
171 static inline void mmx_interp_average_4_U8 (uint8_t * dest,
172 const uint8_t * src1,
173 const uint8_t * src2,
174 const uint8_t * src3,
175 const uint8_t * src4)
177 /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
179 movq_m2r (*src1, mm1); /* load 8 src1 bytes */
180 movq_r2r (mm1, mm2); /* copy 8 src1 bytes */
182 punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */
183 punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */
185 movq_m2r (*src2, mm3); /* load 8 src2 bytes */
186 movq_r2r (mm3, mm4); /* copy 8 src2 bytes */
188 punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */
189 punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */
191 paddw_r2r (mm3, mm1); /* add lows */
192 paddw_r2r (mm4, mm2); /* add highs */
194 /* now have partials in mm1 and mm2 */
196 movq_m2r (*src3, mm3); /* load 8 src3 bytes */
197 movq_r2r (mm3, mm4); /* copy 8 src3 bytes */
199 punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */
200 punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */
202 paddw_r2r (mm3, mm1); /* add lows */
203 paddw_r2r (mm4, mm2); /* add highs */
205 movq_m2r (*src4, mm5); /* load 8 src4 bytes */
206 movq_r2r (mm5, mm6); /* copy 8 src4 bytes */
208 punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */
209 punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */
211 paddw_r2r (mm5, mm1); /* add lows */
212 paddw_r2r (mm6, mm2); /* add highs */
214 paddw_m2r (round4, mm1);
215 psraw_i2r (2, mm1); /* /4 */
216 paddw_m2r (round4, mm2);
217 psraw_i2r (2, mm2); /* /4 */
219 /* now have subtotal/4 in mm1 and mm2 */
221 movq_m2r (*dest, mm3); /* load 8 dest bytes */
222 movq_r2r (mm3, mm4); /* copy 8 dest bytes */
224 packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */
225 movq_r2r (mm1,mm2); /* copy subresult */
227 pxor_r2r (mm1, mm3); /* xor srcavg and dest */
228 pand_m2r (mask1, mm3); /* mask lower bits */
229 psrlq_i2r (1, mm3); /* /2 */
230 por_r2r (mm2, mm4); /* or srcavg and dest */
231 psubb_r2r (mm3, mm4); /* subtract subresults */
232 movq_r2m (mm4, *dest); /* store result in dest */
235 /*-----------------------------------------------------------------------*/
237 static inline void MC_avg_mmx (const int width, int height, uint8_t * dest,
238 const uint8_t * ref, const int stride)
244 mmx_average_2_U8 (dest, dest, ref);
247 mmx_average_2_U8 (dest+8, dest+8, ref+8);
254 static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref,
255 int stride, int height)
257 MC_avg_mmx (16, height, dest, ref, stride);
260 static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref,
261 int stride, int height)
263 MC_avg_mmx (8, height, dest, ref, stride);
266 /*-----------------------------------------------------------------------*/
268 static inline void MC_put_mmx (const int width, int height, uint8_t * dest,
269 const uint8_t * ref, const int stride)
275 movq_m2r (* ref, mm1); /* load 8 ref bytes */
276 movq_r2m (mm1,* dest); /* store 8 bytes at curr */
280 movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */
281 movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */
288 static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref,
289 int stride, int height)
291 MC_put_mmx (16, height, dest, ref, stride);
294 static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref,
295 int stride, int height)
297 MC_put_mmx (8, height, dest, ref, stride);
300 /*-----------------------------------------------------------------------*/
302 /* Half pixel interpolation in the x direction */
303 static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest,
304 const uint8_t * ref, const int stride)
310 mmx_interp_average_2_U8 (dest, ref, ref+1);
313 mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
320 static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref,
321 int stride, int height)
323 MC_avg_x_mmx (16, height, dest, ref, stride);
326 static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref,
327 int stride, int height)
329 MC_avg_x_mmx (8, height, dest, ref, stride);
332 /*-----------------------------------------------------------------------*/
334 static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest,
335 const uint8_t * ref, const int stride)
341 mmx_average_2_U8 (dest, ref, ref+1);
344 mmx_average_2_U8 (dest+8, ref+8, ref+9);
351 static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref,
352 int stride, int height)
354 MC_put_x_mmx (16, height, dest, ref, stride);
357 static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref,
358 int stride, int height)
360 MC_put_x_mmx (8, height, dest, ref, stride);
363 /*-----------------------------------------------------------------------*/
365 static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest,
366 const uint8_t * ref, const int stride)
368 const uint8_t * ref_next = ref + stride;
374 mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
377 mmx_interp_average_4_U8( dest+8, ref+8, ref+9,
378 ref_next+8, ref_next+9 );
386 static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
387 int stride, int height)
389 MC_avg_xy_mmx (16, height, dest, ref, stride);
392 static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
393 int stride, int height)
395 MC_avg_xy_mmx (8, height, dest, ref, stride);
398 /*-----------------------------------------------------------------------*/
400 static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest,
401 const uint8_t * ref, const int stride)
403 const uint8_t * ref_next = ref + stride;
409 mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
412 mmx_average_4_U8( dest+8, ref+8, ref+9, ref_next+8, ref_next+9 );
420 static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
421 int stride, int height)
423 MC_put_xy_mmx (16, height, dest, ref, stride);
426 static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
427 int stride, int height)
429 MC_put_xy_mmx (8, height, dest, ref, stride);
432 /*-----------------------------------------------------------------------*/
434 static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest,
435 const uint8_t * ref, const int stride)
437 const uint8_t * ref_next = ref + stride;
443 mmx_interp_average_2_U8 (dest, ref, ref_next);
446 mmx_interp_average_2_U8( dest+8, ref+8, ref_next+8 );
454 static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref,
455 int stride, int height)
457 MC_avg_y_mmx (16, height, dest, ref, stride);
460 static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref,
461 int stride, int height)
463 MC_avg_y_mmx (8, height, dest, ref, stride);
466 /*-----------------------------------------------------------------------*/
468 static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest,
469 const uint8_t * ref, const int stride)
471 const uint8_t * ref_next = ref + stride;
477 mmx_average_2_U8 (dest, ref, ref_next);
480 mmx_average_2_U8( dest+8, ref+8, ref_next+8 );
488 static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref,
489 int stride, int height)
491 MC_put_y_mmx (16, height, dest, ref, stride);
494 static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref,
495 int stride, int height)
497 MC_put_y_mmx (8, height, dest, ref, stride);
501 MPEG2_MC_EXTERN (mmx)
503 /* CPU_MMXEXT/CPU_3DNOW adaptation layer */
505 #define pavg_r2r(src,dest) \
507 if (cpu == CPU_MMXEXT) \
508 pavgb_r2r (src, dest); \
510 pavgusb_r2r (src, dest); \
513 #define pavg_m2r(src,dest) \
515 if (cpu == CPU_MMXEXT) \
516 pavgb_m2r (src, dest); \
518 pavgusb_m2r (src, dest); \
522 /* CPU_MMXEXT code */
525 static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
529 movq_m2r (*ref, mm0);
530 movq_r2m (mm0, *dest);
536 static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
540 movq_m2r (*ref, mm0);
541 movq_m2r (*(ref+8), mm1);
543 movq_r2m (mm0, *dest);
544 movq_r2m (mm1, *(dest+8));
549 static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
550 const int stride, const int cpu)
553 movq_m2r (*ref, mm0);
554 pavg_m2r (*dest, mm0);
556 movq_r2m (mm0, *dest);
561 static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
562 const int stride, const int cpu)
565 movq_m2r (*ref, mm0);
566 movq_m2r (*(ref+8), mm1);
567 pavg_m2r (*dest, mm0);
568 pavg_m2r (*(dest+8), mm1);
569 movq_r2m (mm0, *dest);
571 movq_r2m (mm1, *(dest+8));
576 static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
577 const int stride, const int offset,
581 movq_m2r (*ref, mm0);
582 pavg_m2r (*(ref+offset), mm0);
584 movq_r2m (mm0, *dest);
589 static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
590 const int stride, const int offset,
594 movq_m2r (*ref, mm0);
595 movq_m2r (*(ref+8), mm1);
596 pavg_m2r (*(ref+offset), mm0);
597 pavg_m2r (*(ref+offset+8), mm1);
598 movq_r2m (mm0, *dest);
600 movq_r2m (mm1, *(dest+8));
605 static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
606 const int stride, const int offset,
610 movq_m2r (*ref, mm0);
611 pavg_m2r (*(ref+offset), mm0);
612 pavg_m2r (*dest, mm0);
614 movq_r2m (mm0, *dest);
619 static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
620 const int stride, const int offset,
624 movq_m2r (*ref, mm0);
625 movq_m2r (*(ref+8), mm1);
626 pavg_m2r (*(ref+offset), mm0);
627 pavg_m2r (*(ref+offset+8), mm1);
628 pavg_m2r (*dest, mm0);
629 pavg_m2r (*(dest+8), mm1);
631 movq_r2m (mm0, *dest);
632 movq_r2m (mm1, *(dest+8));
637 static mmx_t mask_one = {0x0101010101010101LL};
639 static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
640 const int stride, const int cpu)
642 movq_m2r (*ref, mm0);
643 movq_m2r (*(ref+1), mm1);
650 movq_m2r (*ref, mm2);
653 movq_m2r (*(ref+1), mm3);
665 pand_m2r (mask_one, mm7);
667 psubusb_r2r (mm7, mm0);
670 movq_r2m (mm0, *dest);
673 movq_r2r (mm6, mm7); /* unroll ! */
674 movq_r2r (mm2, mm0); /* unroll ! */
678 static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
679 const int stride, const int cpu)
682 movq_m2r (*ref, mm0);
683 movq_m2r (*(ref+stride+1), mm1);
685 movq_m2r (*(ref+1), mm2);
687 movq_m2r (*(ref+stride), mm3);
696 pand_m2r (mask_one, mm7);
698 psubusb_r2r (mm7, mm0);
699 movq_r2m (mm0, *dest);
701 movq_m2r (*(ref+8), mm0);
702 movq_m2r (*(ref+stride+9), mm1);
704 movq_m2r (*(ref+9), mm2);
706 movq_m2r (*(ref+stride+8), mm3);
715 pand_m2r (mask_one, mm7);
717 psubusb_r2r (mm7, mm0);
719 movq_r2m (mm0, *(dest+8));
724 static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
725 const int stride, const int cpu)
728 movq_m2r (*ref, mm0);
729 movq_m2r (*(ref+stride+1), mm1);
731 movq_m2r (*(ref+1), mm2);
733 movq_m2r (*(ref+stride), mm3);
742 pand_m2r (mask_one, mm7);
744 psubusb_r2r (mm7, mm0);
745 movq_m2r (*dest, mm1);
748 movq_r2m (mm0, *dest);
753 static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
754 const int stride, const int cpu)
757 movq_m2r (*ref, mm0);
758 movq_m2r (*(ref+stride+1), mm1);
760 movq_m2r (*(ref+1), mm2);
762 movq_m2r (*(ref+stride), mm3);
771 pand_m2r (mask_one, mm7);
773 psubusb_r2r (mm7, mm0);
774 movq_m2r (*dest, mm1);
776 movq_r2m (mm0, *dest);
778 movq_m2r (*(ref+8), mm0);
779 movq_m2r (*(ref+stride+9), mm1);
781 movq_m2r (*(ref+9), mm2);
783 movq_m2r (*(ref+stride+8), mm3);
792 pand_m2r (mask_one, mm7);
794 psubusb_r2r (mm7, mm0);
795 movq_m2r (*(dest+8), mm1);
798 movq_r2m (mm0, *(dest+8));
803 static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
804 int stride, int height)
806 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
809 static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
810 int stride, int height)
812 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
815 static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
816 int stride, int height)
818 MC_put1_16 (height, dest, ref, stride);
821 static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
822 int stride, int height)
824 MC_put1_8 (height, dest, ref, stride);
827 static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
828 int stride, int height)
830 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
833 static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
834 int stride, int height)
836 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
839 static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
840 int stride, int height)
842 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
845 static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
846 int stride, int height)
848 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
851 static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
852 int stride, int height)
854 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
857 static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
858 int stride, int height)
860 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
863 static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
864 int stride, int height)
866 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
869 static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
870 int stride, int height)
872 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
875 static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
876 int stride, int height)
878 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
881 static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
882 int stride, int height)
884 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
887 static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
888 int stride, int height)
890 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
893 static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
894 int stride, int height)
896 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
899 MPEG2_MC_EXTERN (mmxext)
901 static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
902 int stride, int height)
904 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
907 static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
908 int stride, int height)
910 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
913 static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
914 int stride, int height)
916 MC_put1_16 (height, dest, ref, stride);
919 static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
920 int stride, int height)
922 MC_put1_8 (height, dest, ref, stride);
925 static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
926 int stride, int height)
928 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
931 static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
932 int stride, int height)
934 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
937 static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
938 int stride, int height)
940 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
943 static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
944 int stride, int height)
946 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
949 static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
950 int stride, int height)
952 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
955 static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
956 int stride, int height)
958 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
961 static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
962 int stride, int height)
964 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
967 static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
968 int stride, int height)
970 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
973 static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
974 int stride, int height)
976 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
979 static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
980 int stride, int height)
982 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
985 static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
986 int stride, int height)
988 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
991 static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
992 int stride, int height)
994 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
997 MPEG2_MC_EXTERN (3dnow)