1 /*****************************************************************************
2 * motion3dnow.c : 3D Now! motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motion3dnow.c,v 1.11 2002/06/01 12:32:00 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
35 /*****************************************************************************
36 * Local and extern prototypes.
37 *****************************************************************************/
38 static void motion_getfunctions( function_list_t * p_function_list );
40 /*****************************************************************************
41 * Build configuration tree.
42 *****************************************************************************/
47 SET_DESCRIPTION( _("3D Now! motion compensation module") )
48 ADD_CAPABILITY( MOTION, 150 )
49 ADD_REQUIREMENT( 3DNOW )
51 ADD_SHORTCUT( "3dnow" )
55 motion_getfunctions( &p_module->p_functions->motion );
58 MODULE_DEACTIVATE_START
59 MODULE_DEACTIVATE_STOP
61 /*****************************************************************************
62 * Motion compensation in 3D Now! (OK I know this does MMXEXT too and it's ugly)
63 *****************************************************************************/
69 //CPU_MMXEXT/CPU_3DNOW adaptation layer
71 #define pavg_r2r(src,dest) \
73 if (cpu == CPU_MMXEXT) \
74 pavgb_r2r (src, dest); \
76 pavgusb_r2r (src, dest); \
79 #define pavg_m2r(src,dest) \
81 if (cpu == CPU_MMXEXT) \
82 pavgb_m2r (src, dest); \
84 pavgusb_m2r (src, dest); \
91 static inline void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
96 movq_r2m (mm0, *dest);
102 static inline void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
106 movq_m2r (*ref, mm0);
107 movq_m2r (*(ref+8), mm1);
109 movq_r2m (mm0, *dest);
110 movq_r2m (mm1, *(dest+8));
115 static inline void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
119 movq_m2r (*ref, mm0);
120 pavg_m2r (*dest, mm0);
122 movq_r2m (mm0, *dest);
127 static inline void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
131 movq_m2r (*ref, mm0);
132 movq_m2r (*(ref+8), mm1);
133 pavg_m2r (*dest, mm0);
134 pavg_m2r (*(dest+8), mm1);
135 movq_r2m (mm0, *dest);
137 movq_r2m (mm1, *(dest+8));
142 static inline void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
143 int stride, int offset, int cpu)
146 movq_m2r (*ref, mm0);
147 pavg_m2r (*(ref+offset), mm0);
149 movq_r2m (mm0, *dest);
154 static inline void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
155 int stride, int offset, int cpu)
158 movq_m2r (*ref, mm0);
159 movq_m2r (*(ref+8), mm1);
160 pavg_m2r (*(ref+offset), mm0);
161 pavg_m2r (*(ref+offset+8), mm1);
162 movq_r2m (mm0, *dest);
164 movq_r2m (mm1, *(dest+8));
169 static inline void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
170 int stride, int offset, int cpu)
173 movq_m2r (*ref, mm0);
174 pavg_m2r (*(ref+offset), mm0);
175 pavg_m2r (*dest, mm0);
177 movq_r2m (mm0, *dest);
182 static inline void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
183 int stride, int offset, int cpu)
186 movq_m2r (*ref, mm0);
187 movq_m2r (*(ref+8), mm1);
188 pavg_m2r (*(ref+offset), mm0);
189 pavg_m2r (*(ref+offset+8), mm1);
190 pavg_m2r (*dest, mm0);
191 pavg_m2r (*(dest+8), mm1);
193 movq_r2m (mm0, *dest);
194 movq_r2m (mm1, *(dest+8));
199 static mmx_t mask_one = {0x0101010101010101LL};
201 static inline void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
204 movq_m2r (*ref, mm0);
205 movq_m2r (*(ref+1), mm1);
212 movq_m2r (*ref, mm2);
215 movq_m2r (*(ref+1), mm3);
227 pand_m2r (mask_one, mm7);
229 psubusb_r2r (mm7, mm0);
232 movq_r2m (mm0, *dest);
235 movq_r2r (mm6, mm7); // unroll !
236 movq_r2r (mm2, mm0); // unroll !
240 static inline void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
244 movq_m2r (*ref, mm0);
245 movq_m2r (*(ref+stride+1), mm1);
247 movq_m2r (*(ref+1), mm2);
249 movq_m2r (*(ref+stride), mm3);
258 pand_m2r (mask_one, mm7);
260 psubusb_r2r (mm7, mm0);
261 movq_r2m (mm0, *dest);
263 movq_m2r (*(ref+8), mm0);
264 movq_m2r (*(ref+stride+9), mm1);
266 movq_m2r (*(ref+9), mm2);
268 movq_m2r (*(ref+stride+8), mm3);
277 pand_m2r (mask_one, mm7);
279 psubusb_r2r (mm7, mm0);
281 movq_r2m (mm0, *(dest+8));
286 static inline void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
290 movq_m2r (*ref, mm0);
291 movq_m2r (*(ref+stride+1), mm1);
293 movq_m2r (*(ref+1), mm2);
295 movq_m2r (*(ref+stride), mm3);
304 pand_m2r (mask_one, mm7);
306 psubusb_r2r (mm7, mm0);
307 movq_m2r (*dest, mm1);
310 movq_r2m (mm0, *dest);
315 static inline void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
319 movq_m2r (*ref, mm0);
320 movq_m2r (*(ref+stride+1), mm1);
322 movq_m2r (*(ref+1), mm2);
324 movq_m2r (*(ref+stride), mm3);
333 pand_m2r (mask_one, mm7);
335 psubusb_r2r (mm7, mm0);
336 movq_m2r (*dest, mm1);
338 movq_r2m (mm0, *dest);
340 movq_m2r (*(ref+8), mm0);
341 movq_m2r (*(ref+stride+9), mm1);
343 movq_m2r (*(ref+9), mm2);
345 movq_m2r (*(ref+stride+8), mm3);
354 pand_m2r (mask_one, mm7);
356 psubusb_r2r (mm7, mm0);
357 movq_m2r (*(dest+8), mm1);
360 movq_r2m (mm0, *(dest+8));
365 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
366 int stride, int height)
368 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
371 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
372 int stride, int height)
374 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
377 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
378 int stride, int height)
380 MC_put1_16 (height, dest, ref, stride);
383 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
384 int stride, int height)
386 MC_put1_8 (height, dest, ref, stride);
389 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
390 int stride, int height)
392 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
395 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
396 int stride, int height)
398 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
401 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
402 int stride, int height)
404 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
407 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
408 int stride, int height)
410 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
413 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
414 int stride, int height)
416 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
419 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
420 int stride, int height)
422 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
425 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
426 int stride, int height)
428 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
431 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
432 int stride, int height)
434 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
437 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
438 int stride, int height)
440 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
443 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
444 int stride, int height)
446 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
449 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
450 int stride, int height)
452 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
455 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
456 int stride, int height)
458 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
462 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
463 int stride, int height)
465 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
468 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
469 int stride, int height)
471 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
474 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
475 int stride, int height)
477 MC_put1_16 (height, dest, ref, stride);
480 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
481 int stride, int height)
483 MC_put1_8 (height, dest, ref, stride);
486 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
487 int stride, int height)
489 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
492 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
493 int stride, int height)
495 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
498 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
499 int stride, int height)
501 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
504 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
505 int stride, int height)
507 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
510 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
511 int stride, int height)
513 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
516 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
517 int stride, int height)
519 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
522 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
523 int stride, int height)
525 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
528 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
529 int stride, int height)
531 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
534 static void MC_avg_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
535 int stride, int height)
537 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
540 static void MC_avg_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
541 int stride, int height)
543 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
546 static void MC_put_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
547 int stride, int height)
549 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
552 static void MC_put_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
553 int stride, int height)
555 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
558 /*****************************************************************************
559 * Functions exported as capabilities. They are declared as static so that
560 * we don't pollute the namespace too much.
561 *****************************************************************************/
562 static void motion_getfunctions( function_list_t * p_function_list )
564 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
568 /* Copying functions */
571 MC_put_16_3dnow, MC_put_x16_3dnow, MC_put_y16_3dnow, MC_put_xy16_3dnow
575 MC_put_8_3dnow, MC_put_x8_3dnow, MC_put_y8_3dnow, MC_put_xy8_3dnow
579 /* Averaging functions */
582 MC_avg_16_3dnow, MC_avg_x16_3dnow, MC_avg_y16_3dnow, MC_avg_xy16_3dnow
586 MC_avg_8_3dnow, MC_avg_x8_3dnow, MC_avg_y8_3dnow, MC_avg_xy8_3dnow
591 #define list p_function_list->functions.motion
592 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );