1 /*****************************************************************************
2 * motionmmxext.c : MMX EXT motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionmmxext.c,v 1.15 2001/12/30 07:09:55 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
31 #include <videolan/vlc.h>
35 /*****************************************************************************
36 * Local and extern prototypes.
37 *****************************************************************************/
38 static void motion_getfunctions( function_list_t * p_function_list );
40 /*****************************************************************************
41 * Build configuration tree.
42 *****************************************************************************/
47 SET_DESCRIPTION( "MMXEXT motion compensation module" )
48 ADD_CAPABILITY( MOTION, 200 )
49 ADD_REQUIREMENT( MMXEXT )
50 ADD_SHORTCUT( "mmxext" )
51 ADD_SHORTCUT( "motionmmxext" )
55 motion_getfunctions( &p_module->p_functions->motion );
58 MODULE_DEACTIVATE_START
59 MODULE_DEACTIVATE_STOP
61 /*****************************************************************************
62 * motion_Probe: tests probe the CPU and return a score
63 *****************************************************************************/
64 static int motion_Probe( probedata_t *p_data )
69 /*****************************************************************************
70 * Motion compensation in MMXEXT (OK I know this does 3DNow too and it's ugly)
71 *****************************************************************************/
77 //CPU_MMXEXT/CPU_3DNOW adaptation layer
79 #define pavg_r2r(src,dest) \
81 if (cpu == CPU_MMXEXT) \
82 pavgb_r2r (src, dest); \
84 pavgusb_r2r (src, dest); \
87 #define pavg_m2r(src,dest) \
89 if (cpu == CPU_MMXEXT) \
90 pavgb_m2r (src, dest); \
92 pavgusb_m2r (src, dest); \
99 static __inline__ void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
103 movq_m2r (*ref, mm0);
104 movq_r2m (mm0, *dest);
110 static __inline__ void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
114 movq_m2r (*ref, mm0);
115 movq_m2r (*(ref+8), mm1);
117 movq_r2m (mm0, *dest);
118 movq_r2m (mm1, *(dest+8));
123 static __inline__ void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
127 movq_m2r (*ref, mm0);
128 pavg_m2r (*dest, mm0);
130 movq_r2m (mm0, *dest);
135 static __inline__ void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
139 movq_m2r (*ref, mm0);
140 movq_m2r (*(ref+8), mm1);
141 pavg_m2r (*dest, mm0);
142 pavg_m2r (*(dest+8), mm1);
143 movq_r2m (mm0, *dest);
145 movq_r2m (mm1, *(dest+8));
150 static __inline__ void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
151 int stride, int offset, int cpu)
154 movq_m2r (*ref, mm0);
155 pavg_m2r (*(ref+offset), mm0);
157 movq_r2m (mm0, *dest);
162 static __inline__ void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
163 int stride, int offset, int cpu)
166 movq_m2r (*ref, mm0);
167 movq_m2r (*(ref+8), mm1);
168 pavg_m2r (*(ref+offset), mm0);
169 pavg_m2r (*(ref+offset+8), mm1);
170 movq_r2m (mm0, *dest);
172 movq_r2m (mm1, *(dest+8));
177 static __inline__ void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
178 int stride, int offset, int cpu)
181 movq_m2r (*ref, mm0);
182 pavg_m2r (*(ref+offset), mm0);
183 pavg_m2r (*dest, mm0);
185 movq_r2m (mm0, *dest);
190 static __inline__ void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
191 int stride, int offset, int cpu)
194 movq_m2r (*ref, mm0);
195 movq_m2r (*(ref+8), mm1);
196 pavg_m2r (*(ref+offset), mm0);
197 pavg_m2r (*(ref+offset+8), mm1);
198 pavg_m2r (*dest, mm0);
199 pavg_m2r (*(dest+8), mm1);
201 movq_r2m (mm0, *dest);
202 movq_r2m (mm1, *(dest+8));
207 static mmx_t mask_one = {0x0101010101010101LL};
209 static __inline__ void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
212 movq_m2r (*ref, mm0);
213 movq_m2r (*(ref+1), mm1);
220 movq_m2r (*ref, mm2);
223 movq_m2r (*(ref+1), mm3);
235 pand_m2r (mask_one, mm7);
237 psubusb_r2r (mm7, mm0);
240 movq_r2m (mm0, *dest);
243 movq_r2r (mm6, mm7); // unroll !
244 movq_r2r (mm2, mm0); // unroll !
248 static __inline__ void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
252 movq_m2r (*ref, mm0);
253 movq_m2r (*(ref+stride+1), mm1);
255 movq_m2r (*(ref+1), mm2);
257 movq_m2r (*(ref+stride), mm3);
266 pand_m2r (mask_one, mm7);
268 psubusb_r2r (mm7, mm0);
269 movq_r2m (mm0, *dest);
271 movq_m2r (*(ref+8), mm0);
272 movq_m2r (*(ref+stride+9), mm1);
274 movq_m2r (*(ref+9), mm2);
276 movq_m2r (*(ref+stride+8), mm3);
285 pand_m2r (mask_one, mm7);
287 psubusb_r2r (mm7, mm0);
289 movq_r2m (mm0, *(dest+8));
294 static __inline__ void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
298 movq_m2r (*ref, mm0);
299 movq_m2r (*(ref+stride+1), mm1);
301 movq_m2r (*(ref+1), mm2);
303 movq_m2r (*(ref+stride), mm3);
312 pand_m2r (mask_one, mm7);
314 psubusb_r2r (mm7, mm0);
315 movq_m2r (*dest, mm1);
318 movq_r2m (mm0, *dest);
323 static __inline__ void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
327 movq_m2r (*ref, mm0);
328 movq_m2r (*(ref+stride+1), mm1);
330 movq_m2r (*(ref+1), mm2);
332 movq_m2r (*(ref+stride), mm3);
341 pand_m2r (mask_one, mm7);
343 psubusb_r2r (mm7, mm0);
344 movq_m2r (*dest, mm1);
346 movq_r2m (mm0, *dest);
348 movq_m2r (*(ref+8), mm0);
349 movq_m2r (*(ref+stride+9), mm1);
351 movq_m2r (*(ref+9), mm2);
353 movq_m2r (*(ref+stride+8), mm3);
362 pand_m2r (mask_one, mm7);
364 psubusb_r2r (mm7, mm0);
365 movq_m2r (*(dest+8), mm1);
368 movq_r2m (mm0, *(dest+8));
373 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
374 int stride, int height)
376 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
379 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
380 int stride, int height)
382 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
385 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
386 int stride, int height)
388 MC_put1_16 (height, dest, ref, stride);
391 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
392 int stride, int height)
394 MC_put1_8 (height, dest, ref, stride);
397 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
398 int stride, int height)
400 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
403 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
404 int stride, int height)
406 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
409 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
410 int stride, int height)
412 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
415 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
416 int stride, int height)
418 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
421 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
422 int stride, int height)
424 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
427 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
428 int stride, int height)
430 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
433 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
434 int stride, int height)
436 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
439 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
440 int stride, int height)
442 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
445 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
446 int stride, int height)
448 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
451 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
452 int stride, int height)
454 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
457 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
458 int stride, int height)
460 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
463 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
464 int stride, int height)
466 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
470 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
471 int stride, int height)
473 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
476 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
477 int stride, int height)
479 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
482 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
483 int stride, int height)
485 MC_put1_16 (height, dest, ref, stride);
488 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
489 int stride, int height)
491 MC_put1_8 (height, dest, ref, stride);
494 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
495 int stride, int height)
497 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
500 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
501 int stride, int height)
503 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
506 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
507 int stride, int height)
509 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
512 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
513 int stride, int height)
515 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
518 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
519 int stride, int height)
521 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
524 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
525 int stride, int height)
527 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
530 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
531 int stride, int height)
533 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
536 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
537 int stride, int height)
539 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
542 /*****************************************************************************
543 * Functions exported as capabilities. They are declared as static so that
544 * we don't pollute the namespace too much.
545 *****************************************************************************/
546 static void motion_getfunctions( function_list_t * p_function_list )
548 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
552 /* Copying functions */
555 MC_put_16_mmxext, MC_put_x16_mmxext, MC_put_y16_mmxext, MC_put_xy16_mmxext
559 MC_put_8_mmxext, MC_put_x8_mmxext, MC_put_y8_mmxext, MC_put_xy8_mmxext
563 /* Averaging functions */
566 MC_avg_16_mmxext, MC_avg_x16_mmxext, MC_avg_y16_mmxext, MC_avg_xy16_mmxext
570 MC_avg_8_mmxext, MC_avg_x8_mmxext, MC_avg_y8_mmxext, MC_avg_xy8_mmxext
575 p_function_list->pf_probe = motion_Probe;
577 #define list p_function_list->functions.motion
578 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );