1 /*****************************************************************************
2 * motion3dnow.c : 3DNow! motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motion3dnow.c,v 1.2 2001/09/05 16:07:49 massiot Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #define MODULE_NAME motion3dnow
26 #include "modules_inner.h"
28 /*****************************************************************************
30 *****************************************************************************/
33 #include <stdlib.h> /* malloc(), free() */
36 #include "common.h" /* boolean_t, byte_t */
44 #include "modules_export.h"
46 /*****************************************************************************
47 * Local and extern prototypes.
48 *****************************************************************************/
49 static void motion_getfunctions( function_list_t * p_function_list );
51 /*****************************************************************************
52 * Build configuration tree.
53 *****************************************************************************/
55 ADD_WINDOW( "Configuration for 3DNow! motion compensation module" )
56 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
60 p_module->i_capabilities = MODULE_CAPABILITY_NULL
61 | MODULE_CAPABILITY_MOTION;
62 p_module->psz_longname = "3DNow! motion compensation module";
66 motion_getfunctions( &p_module->p_functions->motion );
69 MODULE_DEACTIVATE_START
70 MODULE_DEACTIVATE_STOP
72 /*****************************************************************************
73 * motion_Probe: tests probe the CPU and return a score
74 *****************************************************************************/
75 static int motion_Probe( probedata_t *p_data )
77 if( !TestCPU( CPU_CAPABILITY_3DNOW ) )
82 if( TestMethod( MOTION_METHOD_VAR, "motion3dnow" )
83 || TestMethod( MOTION_METHOD_VAR, "3dnow" ) )
91 /*****************************************************************************
92 * Motion compensation in 3DNow (OK I know this does MMXEXT too and it's ugly)
93 *****************************************************************************/
99 //CPU_MMXEXT/CPU_3DNOW adaptation layer
101 #define pavg_r2r(src,dest) \
103 if (cpu == CPU_MMXEXT) \
104 pavgb_r2r (src, dest); \
106 pavgusb_r2r (src, dest); \
109 #define pavg_m2r(src,dest) \
111 if (cpu == CPU_MMXEXT) \
112 pavgb_m2r (src, dest); \
114 pavgusb_m2r (src, dest); \
121 static __inline__ void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
125 movq_m2r (*ref, mm0);
126 movq_r2m (mm0, *dest);
132 static __inline__ void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
136 movq_m2r (*ref, mm0);
137 movq_m2r (*(ref+8), mm1);
139 movq_r2m (mm0, *dest);
140 movq_r2m (mm1, *(dest+8));
145 static __inline__ void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
149 movq_m2r (*ref, mm0);
150 pavg_m2r (*dest, mm0);
152 movq_r2m (mm0, *dest);
157 static __inline__ void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
161 movq_m2r (*ref, mm0);
162 movq_m2r (*(ref+8), mm1);
163 pavg_m2r (*dest, mm0);
164 pavg_m2r (*(dest+8), mm1);
165 movq_r2m (mm0, *dest);
167 movq_r2m (mm1, *(dest+8));
172 static __inline__ void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
173 int stride, int offset, int cpu)
176 movq_m2r (*ref, mm0);
177 pavg_m2r (*(ref+offset), mm0);
179 movq_r2m (mm0, *dest);
184 static __inline__ void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
185 int stride, int offset, int cpu)
188 movq_m2r (*ref, mm0);
189 movq_m2r (*(ref+8), mm1);
190 pavg_m2r (*(ref+offset), mm0);
191 pavg_m2r (*(ref+offset+8), mm1);
192 movq_r2m (mm0, *dest);
194 movq_r2m (mm1, *(dest+8));
199 static __inline__ void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
200 int stride, int offset, int cpu)
203 movq_m2r (*ref, mm0);
204 pavg_m2r (*(ref+offset), mm0);
205 pavg_m2r (*dest, mm0);
207 movq_r2m (mm0, *dest);
212 static __inline__ void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
213 int stride, int offset, int cpu)
216 movq_m2r (*ref, mm0);
217 movq_m2r (*(ref+8), mm1);
218 pavg_m2r (*(ref+offset), mm0);
219 pavg_m2r (*(ref+offset+8), mm1);
220 pavg_m2r (*dest, mm0);
221 pavg_m2r (*(dest+8), mm1);
223 movq_r2m (mm0, *dest);
224 movq_r2m (mm1, *(dest+8));
229 static mmx_t mask_one = {0x0101010101010101LL};
231 static __inline__ void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
234 movq_m2r (*ref, mm0);
235 movq_m2r (*(ref+1), mm1);
242 movq_m2r (*ref, mm2);
245 movq_m2r (*(ref+1), mm3);
257 pand_m2r (mask_one, mm7);
259 psubusb_r2r (mm7, mm0);
262 movq_r2m (mm0, *dest);
265 movq_r2r (mm6, mm7); // unroll !
266 movq_r2r (mm2, mm0); // unroll !
270 static __inline__ void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
274 movq_m2r (*ref, mm0);
275 movq_m2r (*(ref+stride+1), mm1);
277 movq_m2r (*(ref+1), mm2);
279 movq_m2r (*(ref+stride), mm3);
288 pand_m2r (mask_one, mm7);
290 psubusb_r2r (mm7, mm0);
291 movq_r2m (mm0, *dest);
293 movq_m2r (*(ref+8), mm0);
294 movq_m2r (*(ref+stride+9), mm1);
296 movq_m2r (*(ref+9), mm2);
298 movq_m2r (*(ref+stride+8), mm3);
307 pand_m2r (mask_one, mm7);
309 psubusb_r2r (mm7, mm0);
311 movq_r2m (mm0, *(dest+8));
316 static __inline__ void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
320 movq_m2r (*ref, mm0);
321 movq_m2r (*(ref+stride+1), mm1);
323 movq_m2r (*(ref+1), mm2);
325 movq_m2r (*(ref+stride), mm3);
334 pand_m2r (mask_one, mm7);
336 psubusb_r2r (mm7, mm0);
337 movq_m2r (*dest, mm1);
340 movq_r2m (mm0, *dest);
345 static __inline__ void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
349 movq_m2r (*ref, mm0);
350 movq_m2r (*(ref+stride+1), mm1);
352 movq_m2r (*(ref+1), mm2);
354 movq_m2r (*(ref+stride), mm3);
363 pand_m2r (mask_one, mm7);
365 psubusb_r2r (mm7, mm0);
366 movq_m2r (*dest, mm1);
368 movq_r2m (mm0, *dest);
370 movq_m2r (*(ref+8), mm0);
371 movq_m2r (*(ref+stride+9), mm1);
373 movq_m2r (*(ref+9), mm2);
375 movq_m2r (*(ref+stride+8), mm3);
384 pand_m2r (mask_one, mm7);
386 psubusb_r2r (mm7, mm0);
387 movq_m2r (*(dest+8), mm1);
390 movq_r2m (mm0, *(dest+8));
395 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
396 int stride, int height)
398 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
401 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
402 int stride, int height)
404 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
407 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
408 int stride, int height)
410 MC_put1_16 (height, dest, ref, stride);
413 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
414 int stride, int height)
416 MC_put1_8 (height, dest, ref, stride);
419 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
420 int stride, int height)
422 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
425 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
426 int stride, int height)
428 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
431 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
432 int stride, int height)
434 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
437 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
438 int stride, int height)
440 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
443 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
444 int stride, int height)
446 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
449 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
450 int stride, int height)
452 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
455 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
456 int stride, int height)
458 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
461 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
462 int stride, int height)
464 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
467 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
468 int stride, int height)
470 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
473 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
474 int stride, int height)
476 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
479 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
480 int stride, int height)
482 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
485 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
486 int stride, int height)
488 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
492 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
493 int stride, int height)
495 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
498 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
499 int stride, int height)
501 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
504 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
505 int stride, int height)
507 MC_put1_16 (height, dest, ref, stride);
510 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
511 int stride, int height)
513 MC_put1_8 (height, dest, ref, stride);
516 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
517 int stride, int height)
519 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
522 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
523 int stride, int height)
525 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
528 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
529 int stride, int height)
531 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
534 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
535 int stride, int height)
537 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
540 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
541 int stride, int height)
543 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
546 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
547 int stride, int height)
549 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
552 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
553 int stride, int height)
555 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
558 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
559 int stride, int height)
561 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
564 static void MC_avg_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
565 int stride, int height)
567 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
570 static void MC_avg_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
571 int stride, int height)
573 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
576 static void MC_put_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
577 int stride, int height)
579 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
582 static void MC_put_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
583 int stride, int height)
585 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
588 /*****************************************************************************
589 * Functions exported as capabilities. They are declared as static so that
590 * we don't pollute the namespace too much.
591 *****************************************************************************/
592 static void motion_getfunctions( function_list_t * p_function_list )
594 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
598 /* Copying functions */
601 MC_put_16_3dnow, MC_put_x16_3dnow, MC_put_y16_3dnow, MC_put_xy16_3dnow
605 MC_put_8_3dnow, MC_put_x8_3dnow, MC_put_y8_3dnow, MC_put_xy8_3dnow
609 /* Averaging functions */
612 MC_avg_16_3dnow, MC_avg_x16_3dnow, MC_avg_y16_3dnow, MC_avg_xy16_3dnow
616 MC_avg_8_3dnow, MC_avg_x8_3dnow, MC_avg_y8_3dnow, MC_avg_xy8_3dnow
621 p_function_list->pf_probe = motion_Probe;
623 #define list p_function_list->functions.motion
624 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );