1 /*****************************************************************************
2 * motionmmxext.c : MMX EXT motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionmmxext.c,v 1.20 2002/07/31 20:56:52 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
35 /*****************************************************************************
37 *****************************************************************************/
38 static int Open ( vlc_object_t * );
40 /*****************************************************************************
42 *****************************************************************************/
44 set_description( _("MMX EXT motion compensation module") );
45 set_capability( "motion compensation", 200 );
46 add_requirement( MMXEXT );
47 add_shortcut( "mmxext" );
48 set_callbacks( Open, NULL );
51 /*****************************************************************************
52 * Motion compensation in MMXEXT (OK I know this does 3DNow too and it's ugly)
53 *****************************************************************************/
59 //CPU_MMXEXT/CPU_3DNOW adaptation layer
61 #define pavg_r2r(src,dest) \
63 if (cpu == CPU_MMXEXT) \
64 pavgb_r2r (src, dest); \
66 pavgusb_r2r (src, dest); \
69 #define pavg_m2r(src,dest) \
71 if (cpu == CPU_MMXEXT) \
72 pavgb_m2r (src, dest); \
74 pavgusb_m2r (src, dest); \
81 static inline void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
86 movq_r2m (mm0, *dest);
92 static inline void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
97 movq_m2r (*(ref+8), mm1);
99 movq_r2m (mm0, *dest);
100 movq_r2m (mm1, *(dest+8));
105 static inline void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
109 movq_m2r (*ref, mm0);
110 pavg_m2r (*dest, mm0);
112 movq_r2m (mm0, *dest);
117 static inline void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
121 movq_m2r (*ref, mm0);
122 movq_m2r (*(ref+8), mm1);
123 pavg_m2r (*dest, mm0);
124 pavg_m2r (*(dest+8), mm1);
125 movq_r2m (mm0, *dest);
127 movq_r2m (mm1, *(dest+8));
132 static inline void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
133 int stride, int offset, int cpu)
136 movq_m2r (*ref, mm0);
137 pavg_m2r (*(ref+offset), mm0);
139 movq_r2m (mm0, *dest);
144 static inline void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
145 int stride, int offset, int cpu)
148 movq_m2r (*ref, mm0);
149 movq_m2r (*(ref+8), mm1);
150 pavg_m2r (*(ref+offset), mm0);
151 pavg_m2r (*(ref+offset+8), mm1);
152 movq_r2m (mm0, *dest);
154 movq_r2m (mm1, *(dest+8));
159 static inline void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
160 int stride, int offset, int cpu)
163 movq_m2r (*ref, mm0);
164 pavg_m2r (*(ref+offset), mm0);
165 pavg_m2r (*dest, mm0);
167 movq_r2m (mm0, *dest);
172 static inline void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
173 int stride, int offset, int cpu)
176 movq_m2r (*ref, mm0);
177 movq_m2r (*(ref+8), mm1);
178 pavg_m2r (*(ref+offset), mm0);
179 pavg_m2r (*(ref+offset+8), mm1);
180 pavg_m2r (*dest, mm0);
181 pavg_m2r (*(dest+8), mm1);
183 movq_r2m (mm0, *dest);
184 movq_r2m (mm1, *(dest+8));
189 static mmx_t mask_one = {0x0101010101010101LL};
191 static inline void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
194 movq_m2r (*ref, mm0);
195 movq_m2r (*(ref+1), mm1);
202 movq_m2r (*ref, mm2);
205 movq_m2r (*(ref+1), mm3);
217 pand_m2r (mask_one, mm7);
219 psubusb_r2r (mm7, mm0);
222 movq_r2m (mm0, *dest);
225 movq_r2r (mm6, mm7); // unroll !
226 movq_r2r (mm2, mm0); // unroll !
230 static inline void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
234 movq_m2r (*ref, mm0);
235 movq_m2r (*(ref+stride+1), mm1);
237 movq_m2r (*(ref+1), mm2);
239 movq_m2r (*(ref+stride), mm3);
248 pand_m2r (mask_one, mm7);
250 psubusb_r2r (mm7, mm0);
251 movq_r2m (mm0, *dest);
253 movq_m2r (*(ref+8), mm0);
254 movq_m2r (*(ref+stride+9), mm1);
256 movq_m2r (*(ref+9), mm2);
258 movq_m2r (*(ref+stride+8), mm3);
267 pand_m2r (mask_one, mm7);
269 psubusb_r2r (mm7, mm0);
271 movq_r2m (mm0, *(dest+8));
276 static inline void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
280 movq_m2r (*ref, mm0);
281 movq_m2r (*(ref+stride+1), mm1);
283 movq_m2r (*(ref+1), mm2);
285 movq_m2r (*(ref+stride), mm3);
294 pand_m2r (mask_one, mm7);
296 psubusb_r2r (mm7, mm0);
297 movq_m2r (*dest, mm1);
300 movq_r2m (mm0, *dest);
305 static inline void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
309 movq_m2r (*ref, mm0);
310 movq_m2r (*(ref+stride+1), mm1);
312 movq_m2r (*(ref+1), mm2);
314 movq_m2r (*(ref+stride), mm3);
323 pand_m2r (mask_one, mm7);
325 psubusb_r2r (mm7, mm0);
326 movq_m2r (*dest, mm1);
328 movq_r2m (mm0, *dest);
330 movq_m2r (*(ref+8), mm0);
331 movq_m2r (*(ref+stride+9), mm1);
333 movq_m2r (*(ref+9), mm2);
335 movq_m2r (*(ref+stride+8), mm3);
344 pand_m2r (mask_one, mm7);
346 psubusb_r2r (mm7, mm0);
347 movq_m2r (*(dest+8), mm1);
350 movq_r2m (mm0, *(dest+8));
355 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
356 int stride, int height)
358 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
361 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
362 int stride, int height)
364 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
367 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
368 int stride, int height)
370 MC_put1_16 (height, dest, ref, stride);
373 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
374 int stride, int height)
376 MC_put1_8 (height, dest, ref, stride);
379 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
380 int stride, int height)
382 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
385 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
386 int stride, int height)
388 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
391 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
392 int stride, int height)
394 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
397 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
398 int stride, int height)
400 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
403 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
404 int stride, int height)
406 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
409 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
410 int stride, int height)
412 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
415 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
416 int stride, int height)
418 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
421 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
422 int stride, int height)
424 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
427 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
428 int stride, int height)
430 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
433 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
434 int stride, int height)
436 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
439 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
440 int stride, int height)
442 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
445 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
446 int stride, int height)
448 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
452 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
453 int stride, int height)
455 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
458 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
459 int stride, int height)
461 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
464 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
465 int stride, int height)
467 MC_put1_16 (height, dest, ref, stride);
470 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
471 int stride, int height)
473 MC_put1_8 (height, dest, ref, stride);
476 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
477 int stride, int height)
479 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
482 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
483 int stride, int height)
485 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
488 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
489 int stride, int height)
491 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
494 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
495 int stride, int height)
497 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
500 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
501 int stride, int height)
503 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
506 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
507 int stride, int height)
509 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
512 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
513 int stride, int height)
515 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
518 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
519 int stride, int height)
521 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
524 /*****************************************************************************
525 * Functions exported as capabilities. They are declared as static so that
526 * we don't pollute the namespace too much.
527 *****************************************************************************/
528 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *, int, int ) =
530 /* Copying functions */
533 { MC_put_16_mmxext, MC_put_x16_mmxext, MC_put_y16_mmxext, MC_put_xy16_mmxext },
535 { MC_put_8_mmxext, MC_put_x8_mmxext, MC_put_y8_mmxext, MC_put_xy8_mmxext }
537 /* Averaging functions */
540 { MC_avg_16_mmxext, MC_avg_x16_mmxext, MC_avg_y16_mmxext, MC_avg_xy16_mmxext },
542 { MC_avg_8_mmxext, MC_avg_x8_mmxext, MC_avg_y8_mmxext, MC_avg_xy8_mmxext }
546 static int Open ( vlc_object_t *p_this )
548 p_this->p_private = ppppf_motion;