1 /*****************************************************************************
2 * motionmmxext.c : MMX EXT motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionmmxext.c,v 1.13 2001/11/28 15:08:05 massiot Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #define MODULE_NAME motionmmxext
26 #include "modules_inner.h"
28 /*****************************************************************************
30 *****************************************************************************/
33 #include <stdlib.h> /* malloc(), free() */
37 #include "common.h" /* boolean_t, byte_t */
46 #include "modules_export.h"
48 /*****************************************************************************
49 * Local and extern prototypes.
50 *****************************************************************************/
51 static void motion_getfunctions( function_list_t * p_function_list );
53 /*****************************************************************************
54 * Build configuration tree.
55 *****************************************************************************/
57 ADD_WINDOW( "Configuration for MMXEXT motion compensation module" )
58 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
62 p_module->i_capabilities = MODULE_CAPABILITY_NULL
63 | MODULE_CAPABILITY_MOTION;
64 p_module->psz_longname = "MMXEXT motion compensation module";
68 motion_getfunctions( &p_module->p_functions->motion );
71 MODULE_DEACTIVATE_START
72 MODULE_DEACTIVATE_STOP
74 /*****************************************************************************
75 * motion_Probe: tests probe the CPU and return a score
76 *****************************************************************************/
77 static int motion_Probe( probedata_t *p_data )
79 if( !TestCPU( CPU_CAPABILITY_MMXEXT ) )
84 if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" )
85 || TestMethod( MOTION_METHOD_VAR, "mmxext" ) )
93 /*****************************************************************************
94 * Motion compensation in MMXEXT (OK I know this does 3DNow too and it's ugly)
95 *****************************************************************************/
101 //CPU_MMXEXT/CPU_3DNOW adaptation layer
103 #define pavg_r2r(src,dest) \
105 if (cpu == CPU_MMXEXT) \
106 pavgb_r2r (src, dest); \
108 pavgusb_r2r (src, dest); \
111 #define pavg_m2r(src,dest) \
113 if (cpu == CPU_MMXEXT) \
114 pavgb_m2r (src, dest); \
116 pavgusb_m2r (src, dest); \
123 static __inline__ void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
127 movq_m2r (*ref, mm0);
128 movq_r2m (mm0, *dest);
134 static __inline__ void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
138 movq_m2r (*ref, mm0);
139 movq_m2r (*(ref+8), mm1);
141 movq_r2m (mm0, *dest);
142 movq_r2m (mm1, *(dest+8));
147 static __inline__ void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
151 movq_m2r (*ref, mm0);
152 pavg_m2r (*dest, mm0);
154 movq_r2m (mm0, *dest);
159 static __inline__ void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
163 movq_m2r (*ref, mm0);
164 movq_m2r (*(ref+8), mm1);
165 pavg_m2r (*dest, mm0);
166 pavg_m2r (*(dest+8), mm1);
167 movq_r2m (mm0, *dest);
169 movq_r2m (mm1, *(dest+8));
174 static __inline__ void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
175 int stride, int offset, int cpu)
178 movq_m2r (*ref, mm0);
179 pavg_m2r (*(ref+offset), mm0);
181 movq_r2m (mm0, *dest);
186 static __inline__ void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
187 int stride, int offset, int cpu)
190 movq_m2r (*ref, mm0);
191 movq_m2r (*(ref+8), mm1);
192 pavg_m2r (*(ref+offset), mm0);
193 pavg_m2r (*(ref+offset+8), mm1);
194 movq_r2m (mm0, *dest);
196 movq_r2m (mm1, *(dest+8));
201 static __inline__ void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
202 int stride, int offset, int cpu)
205 movq_m2r (*ref, mm0);
206 pavg_m2r (*(ref+offset), mm0);
207 pavg_m2r (*dest, mm0);
209 movq_r2m (mm0, *dest);
214 static __inline__ void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
215 int stride, int offset, int cpu)
218 movq_m2r (*ref, mm0);
219 movq_m2r (*(ref+8), mm1);
220 pavg_m2r (*(ref+offset), mm0);
221 pavg_m2r (*(ref+offset+8), mm1);
222 pavg_m2r (*dest, mm0);
223 pavg_m2r (*(dest+8), mm1);
225 movq_r2m (mm0, *dest);
226 movq_r2m (mm1, *(dest+8));
231 static mmx_t mask_one = {0x0101010101010101LL};
233 static __inline__ void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
236 movq_m2r (*ref, mm0);
237 movq_m2r (*(ref+1), mm1);
244 movq_m2r (*ref, mm2);
247 movq_m2r (*(ref+1), mm3);
259 pand_m2r (mask_one, mm7);
261 psubusb_r2r (mm7, mm0);
264 movq_r2m (mm0, *dest);
267 movq_r2r (mm6, mm7); // unroll !
268 movq_r2r (mm2, mm0); // unroll !
272 static __inline__ void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
276 movq_m2r (*ref, mm0);
277 movq_m2r (*(ref+stride+1), mm1);
279 movq_m2r (*(ref+1), mm2);
281 movq_m2r (*(ref+stride), mm3);
290 pand_m2r (mask_one, mm7);
292 psubusb_r2r (mm7, mm0);
293 movq_r2m (mm0, *dest);
295 movq_m2r (*(ref+8), mm0);
296 movq_m2r (*(ref+stride+9), mm1);
298 movq_m2r (*(ref+9), mm2);
300 movq_m2r (*(ref+stride+8), mm3);
309 pand_m2r (mask_one, mm7);
311 psubusb_r2r (mm7, mm0);
313 movq_r2m (mm0, *(dest+8));
318 static __inline__ void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
322 movq_m2r (*ref, mm0);
323 movq_m2r (*(ref+stride+1), mm1);
325 movq_m2r (*(ref+1), mm2);
327 movq_m2r (*(ref+stride), mm3);
336 pand_m2r (mask_one, mm7);
338 psubusb_r2r (mm7, mm0);
339 movq_m2r (*dest, mm1);
342 movq_r2m (mm0, *dest);
347 static __inline__ void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
351 movq_m2r (*ref, mm0);
352 movq_m2r (*(ref+stride+1), mm1);
354 movq_m2r (*(ref+1), mm2);
356 movq_m2r (*(ref+stride), mm3);
365 pand_m2r (mask_one, mm7);
367 psubusb_r2r (mm7, mm0);
368 movq_m2r (*dest, mm1);
370 movq_r2m (mm0, *dest);
372 movq_m2r (*(ref+8), mm0);
373 movq_m2r (*(ref+stride+9), mm1);
375 movq_m2r (*(ref+9), mm2);
377 movq_m2r (*(ref+stride+8), mm3);
386 pand_m2r (mask_one, mm7);
388 psubusb_r2r (mm7, mm0);
389 movq_m2r (*(dest+8), mm1);
392 movq_r2m (mm0, *(dest+8));
397 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
398 int stride, int height)
400 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
403 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
404 int stride, int height)
406 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
409 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
410 int stride, int height)
412 MC_put1_16 (height, dest, ref, stride);
415 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
416 int stride, int height)
418 MC_put1_8 (height, dest, ref, stride);
421 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
422 int stride, int height)
424 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
427 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
428 int stride, int height)
430 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
433 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
434 int stride, int height)
436 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
439 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
440 int stride, int height)
442 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
445 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
446 int stride, int height)
448 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
451 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
452 int stride, int height)
454 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
457 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
458 int stride, int height)
460 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
463 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
464 int stride, int height)
466 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
469 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
470 int stride, int height)
472 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
475 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
476 int stride, int height)
478 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
481 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
482 int stride, int height)
484 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
487 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
488 int stride, int height)
490 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
494 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
495 int stride, int height)
497 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
500 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
501 int stride, int height)
503 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
506 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
507 int stride, int height)
509 MC_put1_16 (height, dest, ref, stride);
512 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
513 int stride, int height)
515 MC_put1_8 (height, dest, ref, stride);
518 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
519 int stride, int height)
521 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
524 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
525 int stride, int height)
527 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
530 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
531 int stride, int height)
533 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
536 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
537 int stride, int height)
539 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
542 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
543 int stride, int height)
545 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
548 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
549 int stride, int height)
551 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
554 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
555 int stride, int height)
557 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
560 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
561 int stride, int height)
563 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
566 /*****************************************************************************
567 * Functions exported as capabilities. They are declared as static so that
568 * we don't pollute the namespace too much.
569 *****************************************************************************/
570 static void motion_getfunctions( function_list_t * p_function_list )
572 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
576 /* Copying functions */
579 MC_put_16_mmxext, MC_put_x16_mmxext, MC_put_y16_mmxext, MC_put_xy16_mmxext
583 MC_put_8_mmxext, MC_put_x8_mmxext, MC_put_y8_mmxext, MC_put_xy8_mmxext
587 /* Averaging functions */
590 MC_avg_16_mmxext, MC_avg_x16_mmxext, MC_avg_y16_mmxext, MC_avg_xy16_mmxext
594 MC_avg_8_mmxext, MC_avg_x8_mmxext, MC_avg_y8_mmxext, MC_avg_xy8_mmxext
599 p_function_list->pf_probe = motion_Probe;
601 #define list p_function_list->functions.motion
602 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );