1 /*****************************************************************************
2 * motion3dnow.c : 3DNow! motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motion3dnow.c,v 1.6 2001/12/30 07:09:55 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
31 #include <videolan/vlc.h>
35 /*****************************************************************************
36 * Local and extern prototypes.
37 *****************************************************************************/
38 static void motion_getfunctions( function_list_t * p_function_list );
40 /*****************************************************************************
41 * Build configuration tree.
42 *****************************************************************************/
47 SET_DESCRIPTION( "3DNow! motion compensation module" )
48 ADD_CAPABILITY( MOTION, 150 )
49 ADD_REQUIREMENT( 3DNOW )
51 ADD_SHORTCUT( "3dnow" )
52 ADD_SHORTCUT( "motion3dn" )
56 motion_getfunctions( &p_module->p_functions->motion );
59 MODULE_DEACTIVATE_START
60 MODULE_DEACTIVATE_STOP
62 /*****************************************************************************
63 * motion_Probe: tests probe the CPU and return a score
64 *****************************************************************************/
65 static int motion_Probe( probedata_t *p_data )
70 /*****************************************************************************
71 * Motion compensation in 3DNow (OK I know this does MMXEXT too and it's ugly)
72 *****************************************************************************/
78 //CPU_MMXEXT/CPU_3DNOW adaptation layer
80 #define pavg_r2r(src,dest) \
82 if (cpu == CPU_MMXEXT) \
83 pavgb_r2r (src, dest); \
85 pavgusb_r2r (src, dest); \
88 #define pavg_m2r(src,dest) \
90 if (cpu == CPU_MMXEXT) \
91 pavgb_m2r (src, dest); \
93 pavgusb_m2r (src, dest); \
100 static __inline__ void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
104 movq_m2r (*ref, mm0);
105 movq_r2m (mm0, *dest);
111 static __inline__ void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
115 movq_m2r (*ref, mm0);
116 movq_m2r (*(ref+8), mm1);
118 movq_r2m (mm0, *dest);
119 movq_r2m (mm1, *(dest+8));
124 static __inline__ void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
128 movq_m2r (*ref, mm0);
129 pavg_m2r (*dest, mm0);
131 movq_r2m (mm0, *dest);
136 static __inline__ void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
140 movq_m2r (*ref, mm0);
141 movq_m2r (*(ref+8), mm1);
142 pavg_m2r (*dest, mm0);
143 pavg_m2r (*(dest+8), mm1);
144 movq_r2m (mm0, *dest);
146 movq_r2m (mm1, *(dest+8));
151 static __inline__ void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
152 int stride, int offset, int cpu)
155 movq_m2r (*ref, mm0);
156 pavg_m2r (*(ref+offset), mm0);
158 movq_r2m (mm0, *dest);
163 static __inline__ void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
164 int stride, int offset, int cpu)
167 movq_m2r (*ref, mm0);
168 movq_m2r (*(ref+8), mm1);
169 pavg_m2r (*(ref+offset), mm0);
170 pavg_m2r (*(ref+offset+8), mm1);
171 movq_r2m (mm0, *dest);
173 movq_r2m (mm1, *(dest+8));
178 static __inline__ void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
179 int stride, int offset, int cpu)
182 movq_m2r (*ref, mm0);
183 pavg_m2r (*(ref+offset), mm0);
184 pavg_m2r (*dest, mm0);
186 movq_r2m (mm0, *dest);
191 static __inline__ void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
192 int stride, int offset, int cpu)
195 movq_m2r (*ref, mm0);
196 movq_m2r (*(ref+8), mm1);
197 pavg_m2r (*(ref+offset), mm0);
198 pavg_m2r (*(ref+offset+8), mm1);
199 pavg_m2r (*dest, mm0);
200 pavg_m2r (*(dest+8), mm1);
202 movq_r2m (mm0, *dest);
203 movq_r2m (mm1, *(dest+8));
208 static mmx_t mask_one = {0x0101010101010101LL};
210 static __inline__ void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
213 movq_m2r (*ref, mm0);
214 movq_m2r (*(ref+1), mm1);
221 movq_m2r (*ref, mm2);
224 movq_m2r (*(ref+1), mm3);
236 pand_m2r (mask_one, mm7);
238 psubusb_r2r (mm7, mm0);
241 movq_r2m (mm0, *dest);
244 movq_r2r (mm6, mm7); // unroll !
245 movq_r2r (mm2, mm0); // unroll !
249 static __inline__ void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
253 movq_m2r (*ref, mm0);
254 movq_m2r (*(ref+stride+1), mm1);
256 movq_m2r (*(ref+1), mm2);
258 movq_m2r (*(ref+stride), mm3);
267 pand_m2r (mask_one, mm7);
269 psubusb_r2r (mm7, mm0);
270 movq_r2m (mm0, *dest);
272 movq_m2r (*(ref+8), mm0);
273 movq_m2r (*(ref+stride+9), mm1);
275 movq_m2r (*(ref+9), mm2);
277 movq_m2r (*(ref+stride+8), mm3);
286 pand_m2r (mask_one, mm7);
288 psubusb_r2r (mm7, mm0);
290 movq_r2m (mm0, *(dest+8));
295 static __inline__ void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
299 movq_m2r (*ref, mm0);
300 movq_m2r (*(ref+stride+1), mm1);
302 movq_m2r (*(ref+1), mm2);
304 movq_m2r (*(ref+stride), mm3);
313 pand_m2r (mask_one, mm7);
315 psubusb_r2r (mm7, mm0);
316 movq_m2r (*dest, mm1);
319 movq_r2m (mm0, *dest);
324 static __inline__ void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
328 movq_m2r (*ref, mm0);
329 movq_m2r (*(ref+stride+1), mm1);
331 movq_m2r (*(ref+1), mm2);
333 movq_m2r (*(ref+stride), mm3);
342 pand_m2r (mask_one, mm7);
344 psubusb_r2r (mm7, mm0);
345 movq_m2r (*dest, mm1);
347 movq_r2m (mm0, *dest);
349 movq_m2r (*(ref+8), mm0);
350 movq_m2r (*(ref+stride+9), mm1);
352 movq_m2r (*(ref+9), mm2);
354 movq_m2r (*(ref+stride+8), mm3);
363 pand_m2r (mask_one, mm7);
365 psubusb_r2r (mm7, mm0);
366 movq_m2r (*(dest+8), mm1);
369 movq_r2m (mm0, *(dest+8));
374 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
375 int stride, int height)
377 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
380 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
381 int stride, int height)
383 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
386 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
387 int stride, int height)
389 MC_put1_16 (height, dest, ref, stride);
392 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
393 int stride, int height)
395 MC_put1_8 (height, dest, ref, stride);
398 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
399 int stride, int height)
401 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
404 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
405 int stride, int height)
407 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
410 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
411 int stride, int height)
413 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
416 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
417 int stride, int height)
419 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
422 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
423 int stride, int height)
425 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
428 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
429 int stride, int height)
431 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
434 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
435 int stride, int height)
437 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
440 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
441 int stride, int height)
443 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
446 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
447 int stride, int height)
449 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
452 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
453 int stride, int height)
455 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
458 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
459 int stride, int height)
461 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
464 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
465 int stride, int height)
467 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
471 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
472 int stride, int height)
474 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
477 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
478 int stride, int height)
480 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
483 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
484 int stride, int height)
486 MC_put1_16 (height, dest, ref, stride);
489 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
490 int stride, int height)
492 MC_put1_8 (height, dest, ref, stride);
495 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
496 int stride, int height)
498 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
501 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
502 int stride, int height)
504 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
507 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
508 int stride, int height)
510 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
513 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
514 int stride, int height)
516 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
519 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
520 int stride, int height)
522 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
525 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
526 int stride, int height)
528 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
531 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
532 int stride, int height)
534 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
537 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
538 int stride, int height)
540 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
543 static void MC_avg_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
544 int stride, int height)
546 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
549 static void MC_avg_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
550 int stride, int height)
552 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
555 static void MC_put_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
556 int stride, int height)
558 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
561 static void MC_put_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
562 int stride, int height)
564 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
567 /*****************************************************************************
568 * Functions exported as capabilities. They are declared as static so that
569 * we don't pollute the namespace too much.
570 *****************************************************************************/
571 static void motion_getfunctions( function_list_t * p_function_list )
573 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
577 /* Copying functions */
580 MC_put_16_3dnow, MC_put_x16_3dnow, MC_put_y16_3dnow, MC_put_xy16_3dnow
584 MC_put_8_3dnow, MC_put_x8_3dnow, MC_put_y8_3dnow, MC_put_xy8_3dnow
588 /* Averaging functions */
591 MC_avg_16_3dnow, MC_avg_x16_3dnow, MC_avg_y16_3dnow, MC_avg_xy16_3dnow
595 MC_avg_8_3dnow, MC_avg_x8_3dnow, MC_avg_y8_3dnow, MC_avg_xy8_3dnow
600 p_function_list->pf_probe = motion_Probe;
602 #define list p_function_list->functions.motion
603 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );