1 /*****************************************************************************
2 * motion3dnow.c : 3D Now! motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motion3dnow.c,v 1.10 2002/05/18 17:47:47 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
31 #include <videolan/vlc.h>
35 /*****************************************************************************
36 * Local and extern prototypes.
37 *****************************************************************************/
38 static void motion_getfunctions( function_list_t * p_function_list );
40 /*****************************************************************************
41 * Build configuration tree.
42 *****************************************************************************/
47 SET_DESCRIPTION( _("3D Now! motion compensation module") )
48 ADD_CAPABILITY( MOTION, 150 )
49 ADD_REQUIREMENT( 3DNOW )
51 ADD_SHORTCUT( "3dnow" )
52 ADD_SHORTCUT( "motion3dn" )
56 motion_getfunctions( &p_module->p_functions->motion );
59 MODULE_DEACTIVATE_START
60 MODULE_DEACTIVATE_STOP
62 /*****************************************************************************
63 * Motion compensation in 3D Now! (OK I know this does MMXEXT too and it's ugly)
64 *****************************************************************************/
70 //CPU_MMXEXT/CPU_3DNOW adaptation layer
72 #define pavg_r2r(src,dest) \
74 if (cpu == CPU_MMXEXT) \
75 pavgb_r2r (src, dest); \
77 pavgusb_r2r (src, dest); \
80 #define pavg_m2r(src,dest) \
82 if (cpu == CPU_MMXEXT) \
83 pavgb_m2r (src, dest); \
85 pavgusb_m2r (src, dest); \
92 static inline void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
97 movq_r2m (mm0, *dest);
103 static inline void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
107 movq_m2r (*ref, mm0);
108 movq_m2r (*(ref+8), mm1);
110 movq_r2m (mm0, *dest);
111 movq_r2m (mm1, *(dest+8));
116 static inline void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
120 movq_m2r (*ref, mm0);
121 pavg_m2r (*dest, mm0);
123 movq_r2m (mm0, *dest);
128 static inline void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
132 movq_m2r (*ref, mm0);
133 movq_m2r (*(ref+8), mm1);
134 pavg_m2r (*dest, mm0);
135 pavg_m2r (*(dest+8), mm1);
136 movq_r2m (mm0, *dest);
138 movq_r2m (mm1, *(dest+8));
143 static inline void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
144 int stride, int offset, int cpu)
147 movq_m2r (*ref, mm0);
148 pavg_m2r (*(ref+offset), mm0);
150 movq_r2m (mm0, *dest);
155 static inline void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
156 int stride, int offset, int cpu)
159 movq_m2r (*ref, mm0);
160 movq_m2r (*(ref+8), mm1);
161 pavg_m2r (*(ref+offset), mm0);
162 pavg_m2r (*(ref+offset+8), mm1);
163 movq_r2m (mm0, *dest);
165 movq_r2m (mm1, *(dest+8));
170 static inline void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
171 int stride, int offset, int cpu)
174 movq_m2r (*ref, mm0);
175 pavg_m2r (*(ref+offset), mm0);
176 pavg_m2r (*dest, mm0);
178 movq_r2m (mm0, *dest);
183 static inline void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
184 int stride, int offset, int cpu)
187 movq_m2r (*ref, mm0);
188 movq_m2r (*(ref+8), mm1);
189 pavg_m2r (*(ref+offset), mm0);
190 pavg_m2r (*(ref+offset+8), mm1);
191 pavg_m2r (*dest, mm0);
192 pavg_m2r (*(dest+8), mm1);
194 movq_r2m (mm0, *dest);
195 movq_r2m (mm1, *(dest+8));
200 static mmx_t mask_one = {0x0101010101010101LL};
202 static inline void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
205 movq_m2r (*ref, mm0);
206 movq_m2r (*(ref+1), mm1);
213 movq_m2r (*ref, mm2);
216 movq_m2r (*(ref+1), mm3);
228 pand_m2r (mask_one, mm7);
230 psubusb_r2r (mm7, mm0);
233 movq_r2m (mm0, *dest);
236 movq_r2r (mm6, mm7); // unroll !
237 movq_r2r (mm2, mm0); // unroll !
241 static inline void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
245 movq_m2r (*ref, mm0);
246 movq_m2r (*(ref+stride+1), mm1);
248 movq_m2r (*(ref+1), mm2);
250 movq_m2r (*(ref+stride), mm3);
259 pand_m2r (mask_one, mm7);
261 psubusb_r2r (mm7, mm0);
262 movq_r2m (mm0, *dest);
264 movq_m2r (*(ref+8), mm0);
265 movq_m2r (*(ref+stride+9), mm1);
267 movq_m2r (*(ref+9), mm2);
269 movq_m2r (*(ref+stride+8), mm3);
278 pand_m2r (mask_one, mm7);
280 psubusb_r2r (mm7, mm0);
282 movq_r2m (mm0, *(dest+8));
287 static inline void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
291 movq_m2r (*ref, mm0);
292 movq_m2r (*(ref+stride+1), mm1);
294 movq_m2r (*(ref+1), mm2);
296 movq_m2r (*(ref+stride), mm3);
305 pand_m2r (mask_one, mm7);
307 psubusb_r2r (mm7, mm0);
308 movq_m2r (*dest, mm1);
311 movq_r2m (mm0, *dest);
316 static inline void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
320 movq_m2r (*ref, mm0);
321 movq_m2r (*(ref+stride+1), mm1);
323 movq_m2r (*(ref+1), mm2);
325 movq_m2r (*(ref+stride), mm3);
334 pand_m2r (mask_one, mm7);
336 psubusb_r2r (mm7, mm0);
337 movq_m2r (*dest, mm1);
339 movq_r2m (mm0, *dest);
341 movq_m2r (*(ref+8), mm0);
342 movq_m2r (*(ref+stride+9), mm1);
344 movq_m2r (*(ref+9), mm2);
346 movq_m2r (*(ref+stride+8), mm3);
355 pand_m2r (mask_one, mm7);
357 psubusb_r2r (mm7, mm0);
358 movq_m2r (*(dest+8), mm1);
361 movq_r2m (mm0, *(dest+8));
366 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
367 int stride, int height)
369 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
372 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
373 int stride, int height)
375 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
378 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
379 int stride, int height)
381 MC_put1_16 (height, dest, ref, stride);
384 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
385 int stride, int height)
387 MC_put1_8 (height, dest, ref, stride);
390 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
391 int stride, int height)
393 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
396 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
397 int stride, int height)
399 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
402 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
403 int stride, int height)
405 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
408 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
409 int stride, int height)
411 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
414 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
415 int stride, int height)
417 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
420 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
421 int stride, int height)
423 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
426 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
427 int stride, int height)
429 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
432 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
433 int stride, int height)
435 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
438 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
439 int stride, int height)
441 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
444 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
445 int stride, int height)
447 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
450 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
451 int stride, int height)
453 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
456 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
457 int stride, int height)
459 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
463 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
464 int stride, int height)
466 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
469 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
470 int stride, int height)
472 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
475 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
476 int stride, int height)
478 MC_put1_16 (height, dest, ref, stride);
481 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
482 int stride, int height)
484 MC_put1_8 (height, dest, ref, stride);
487 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
488 int stride, int height)
490 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
493 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
494 int stride, int height)
496 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
499 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
500 int stride, int height)
502 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
505 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
506 int stride, int height)
508 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
511 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
512 int stride, int height)
514 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
517 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
518 int stride, int height)
520 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
523 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
524 int stride, int height)
526 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
529 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
530 int stride, int height)
532 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
535 static void MC_avg_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
536 int stride, int height)
538 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
541 static void MC_avg_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
542 int stride, int height)
544 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
547 static void MC_put_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
548 int stride, int height)
550 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
553 static void MC_put_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
554 int stride, int height)
556 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
559 /*****************************************************************************
560 * Functions exported as capabilities. They are declared as static so that
561 * we don't pollute the namespace too much.
562 *****************************************************************************/
563 static void motion_getfunctions( function_list_t * p_function_list )
565 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
569 /* Copying functions */
572 MC_put_16_3dnow, MC_put_x16_3dnow, MC_put_y16_3dnow, MC_put_xy16_3dnow
576 MC_put_8_3dnow, MC_put_x8_3dnow, MC_put_y8_3dnow, MC_put_xy8_3dnow
580 /* Averaging functions */
583 MC_avg_16_3dnow, MC_avg_x16_3dnow, MC_avg_y16_3dnow, MC_avg_xy16_3dnow
587 MC_avg_8_3dnow, MC_avg_x8_3dnow, MC_avg_y8_3dnow, MC_avg_xy8_3dnow
592 #define list p_function_list->functions.motion
593 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );