1 /*****************************************************************************
2 * motionmmxext.c : MMX EXT motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionmmxext.c,v 1.14 2001/12/09 17:01:36 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #define MODULE_NAME motionmmxext
26 #include "modules_inner.h"
28 /*****************************************************************************
30 *****************************************************************************/
33 #include <stdlib.h> /* malloc(), free() */
36 #include "common.h" /* boolean_t, byte_t */
45 #include "modules_export.h"
47 /*****************************************************************************
48 * Local and extern prototypes.
49 *****************************************************************************/
50 static void motion_getfunctions( function_list_t * p_function_list );
52 /*****************************************************************************
53 * Build configuration tree.
54 *****************************************************************************/
56 ADD_WINDOW( "Configuration for MMXEXT motion compensation module" )
57 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
61 p_module->i_capabilities = MODULE_CAPABILITY_NULL
62 | MODULE_CAPABILITY_MOTION;
63 p_module->psz_longname = "MMXEXT motion compensation module";
67 motion_getfunctions( &p_module->p_functions->motion );
70 MODULE_DEACTIVATE_START
71 MODULE_DEACTIVATE_STOP
73 /*****************************************************************************
74 * motion_Probe: tests probe the CPU and return a score
75 *****************************************************************************/
76 static int motion_Probe( probedata_t *p_data )
78 if( !TestCPU( CPU_CAPABILITY_MMXEXT ) )
83 if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" )
84 || TestMethod( MOTION_METHOD_VAR, "mmxext" ) )
92 /*****************************************************************************
93 * Motion compensation in MMXEXT (OK I know this does 3DNow too and it's ugly)
94 *****************************************************************************/
100 //CPU_MMXEXT/CPU_3DNOW adaptation layer
102 #define pavg_r2r(src,dest) \
104 if (cpu == CPU_MMXEXT) \
105 pavgb_r2r (src, dest); \
107 pavgusb_r2r (src, dest); \
110 #define pavg_m2r(src,dest) \
112 if (cpu == CPU_MMXEXT) \
113 pavgb_m2r (src, dest); \
115 pavgusb_m2r (src, dest); \
122 static __inline__ void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
126 movq_m2r (*ref, mm0);
127 movq_r2m (mm0, *dest);
133 static __inline__ void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
137 movq_m2r (*ref, mm0);
138 movq_m2r (*(ref+8), mm1);
140 movq_r2m (mm0, *dest);
141 movq_r2m (mm1, *(dest+8));
146 static __inline__ void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
150 movq_m2r (*ref, mm0);
151 pavg_m2r (*dest, mm0);
153 movq_r2m (mm0, *dest);
158 static __inline__ void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
162 movq_m2r (*ref, mm0);
163 movq_m2r (*(ref+8), mm1);
164 pavg_m2r (*dest, mm0);
165 pavg_m2r (*(dest+8), mm1);
166 movq_r2m (mm0, *dest);
168 movq_r2m (mm1, *(dest+8));
173 static __inline__ void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
174 int stride, int offset, int cpu)
177 movq_m2r (*ref, mm0);
178 pavg_m2r (*(ref+offset), mm0);
180 movq_r2m (mm0, *dest);
185 static __inline__ void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
186 int stride, int offset, int cpu)
189 movq_m2r (*ref, mm0);
190 movq_m2r (*(ref+8), mm1);
191 pavg_m2r (*(ref+offset), mm0);
192 pavg_m2r (*(ref+offset+8), mm1);
193 movq_r2m (mm0, *dest);
195 movq_r2m (mm1, *(dest+8));
200 static __inline__ void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
201 int stride, int offset, int cpu)
204 movq_m2r (*ref, mm0);
205 pavg_m2r (*(ref+offset), mm0);
206 pavg_m2r (*dest, mm0);
208 movq_r2m (mm0, *dest);
213 static __inline__ void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
214 int stride, int offset, int cpu)
217 movq_m2r (*ref, mm0);
218 movq_m2r (*(ref+8), mm1);
219 pavg_m2r (*(ref+offset), mm0);
220 pavg_m2r (*(ref+offset+8), mm1);
221 pavg_m2r (*dest, mm0);
222 pavg_m2r (*(dest+8), mm1);
224 movq_r2m (mm0, *dest);
225 movq_r2m (mm1, *(dest+8));
230 static mmx_t mask_one = {0x0101010101010101LL};
232 static __inline__ void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
235 movq_m2r (*ref, mm0);
236 movq_m2r (*(ref+1), mm1);
243 movq_m2r (*ref, mm2);
246 movq_m2r (*(ref+1), mm3);
258 pand_m2r (mask_one, mm7);
260 psubusb_r2r (mm7, mm0);
263 movq_r2m (mm0, *dest);
266 movq_r2r (mm6, mm7); // unroll !
267 movq_r2r (mm2, mm0); // unroll !
271 static __inline__ void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
275 movq_m2r (*ref, mm0);
276 movq_m2r (*(ref+stride+1), mm1);
278 movq_m2r (*(ref+1), mm2);
280 movq_m2r (*(ref+stride), mm3);
289 pand_m2r (mask_one, mm7);
291 psubusb_r2r (mm7, mm0);
292 movq_r2m (mm0, *dest);
294 movq_m2r (*(ref+8), mm0);
295 movq_m2r (*(ref+stride+9), mm1);
297 movq_m2r (*(ref+9), mm2);
299 movq_m2r (*(ref+stride+8), mm3);
308 pand_m2r (mask_one, mm7);
310 psubusb_r2r (mm7, mm0);
312 movq_r2m (mm0, *(dest+8));
317 static __inline__ void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
321 movq_m2r (*ref, mm0);
322 movq_m2r (*(ref+stride+1), mm1);
324 movq_m2r (*(ref+1), mm2);
326 movq_m2r (*(ref+stride), mm3);
335 pand_m2r (mask_one, mm7);
337 psubusb_r2r (mm7, mm0);
338 movq_m2r (*dest, mm1);
341 movq_r2m (mm0, *dest);
346 static __inline__ void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
350 movq_m2r (*ref, mm0);
351 movq_m2r (*(ref+stride+1), mm1);
353 movq_m2r (*(ref+1), mm2);
355 movq_m2r (*(ref+stride), mm3);
364 pand_m2r (mask_one, mm7);
366 psubusb_r2r (mm7, mm0);
367 movq_m2r (*dest, mm1);
369 movq_r2m (mm0, *dest);
371 movq_m2r (*(ref+8), mm0);
372 movq_m2r (*(ref+stride+9), mm1);
374 movq_m2r (*(ref+9), mm2);
376 movq_m2r (*(ref+stride+8), mm3);
385 pand_m2r (mask_one, mm7);
387 psubusb_r2r (mm7, mm0);
388 movq_m2r (*(dest+8), mm1);
391 movq_r2m (mm0, *(dest+8));
396 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
397 int stride, int height)
399 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
402 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
403 int stride, int height)
405 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
408 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
409 int stride, int height)
411 MC_put1_16 (height, dest, ref, stride);
414 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
415 int stride, int height)
417 MC_put1_8 (height, dest, ref, stride);
420 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
421 int stride, int height)
423 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
426 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
427 int stride, int height)
429 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
432 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
433 int stride, int height)
435 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
438 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
439 int stride, int height)
441 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
444 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
445 int stride, int height)
447 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
450 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
451 int stride, int height)
453 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
456 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
457 int stride, int height)
459 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
462 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
463 int stride, int height)
465 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
468 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
469 int stride, int height)
471 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
474 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
475 int stride, int height)
477 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
480 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
481 int stride, int height)
483 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
486 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
487 int stride, int height)
489 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
493 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
494 int stride, int height)
496 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
499 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
500 int stride, int height)
502 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
505 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
506 int stride, int height)
508 MC_put1_16 (height, dest, ref, stride);
511 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
512 int stride, int height)
514 MC_put1_8 (height, dest, ref, stride);
517 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
518 int stride, int height)
520 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
523 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
524 int stride, int height)
526 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
529 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
530 int stride, int height)
532 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
535 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
536 int stride, int height)
538 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
541 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
542 int stride, int height)
544 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
547 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
548 int stride, int height)
550 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
553 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
554 int stride, int height)
556 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
559 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
560 int stride, int height)
562 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
565 /*****************************************************************************
566 * Functions exported as capabilities. They are declared as static so that
567 * we don't pollute the namespace too much.
568 *****************************************************************************/
569 static void motion_getfunctions( function_list_t * p_function_list )
571 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
575 /* Copying functions */
578 MC_put_16_mmxext, MC_put_x16_mmxext, MC_put_y16_mmxext, MC_put_xy16_mmxext
582 MC_put_8_mmxext, MC_put_x8_mmxext, MC_put_y8_mmxext, MC_put_xy8_mmxext
586 /* Averaging functions */
589 MC_avg_16_mmxext, MC_avg_x16_mmxext, MC_avg_y16_mmxext, MC_avg_xy16_mmxext
593 MC_avg_8_mmxext, MC_avg_x8_mmxext, MC_avg_y8_mmxext, MC_avg_xy8_mmxext
598 p_function_list->pf_probe = motion_Probe;
600 #define list p_function_list->functions.motion
601 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );