1 /*****************************************************************************
2 * motionmmxext.c : MMX EXT motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionmmxext.c,v 1.19 2002/06/01 12:32:00 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
35 /*****************************************************************************
36 * Local and extern prototypes.
37 *****************************************************************************/
38 static void motion_getfunctions( function_list_t * p_function_list );
40 /*****************************************************************************
41 * Build configuration tree.
42 *****************************************************************************/
47 SET_DESCRIPTION( _("MMXEXT motion compensation module") )
48 ADD_CAPABILITY( MOTION, 200 )
49 ADD_REQUIREMENT( MMXEXT )
50 ADD_SHORTCUT( "mmxext" )
54 motion_getfunctions( &p_module->p_functions->motion );
57 MODULE_DEACTIVATE_START
58 MODULE_DEACTIVATE_STOP
60 /*****************************************************************************
61 * Motion compensation in MMXEXT (OK I know this does 3DNow too and it's ugly)
62 *****************************************************************************/
68 //CPU_MMXEXT/CPU_3DNOW adaptation layer
70 #define pavg_r2r(src,dest) \
72 if (cpu == CPU_MMXEXT) \
73 pavgb_r2r (src, dest); \
75 pavgusb_r2r (src, dest); \
78 #define pavg_m2r(src,dest) \
80 if (cpu == CPU_MMXEXT) \
81 pavgb_m2r (src, dest); \
83 pavgusb_m2r (src, dest); \
90 static inline void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
95 movq_r2m (mm0, *dest);
101 static inline void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
105 movq_m2r (*ref, mm0);
106 movq_m2r (*(ref+8), mm1);
108 movq_r2m (mm0, *dest);
109 movq_r2m (mm1, *(dest+8));
114 static inline void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
118 movq_m2r (*ref, mm0);
119 pavg_m2r (*dest, mm0);
121 movq_r2m (mm0, *dest);
126 static inline void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
130 movq_m2r (*ref, mm0);
131 movq_m2r (*(ref+8), mm1);
132 pavg_m2r (*dest, mm0);
133 pavg_m2r (*(dest+8), mm1);
134 movq_r2m (mm0, *dest);
136 movq_r2m (mm1, *(dest+8));
141 static inline void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
142 int stride, int offset, int cpu)
145 movq_m2r (*ref, mm0);
146 pavg_m2r (*(ref+offset), mm0);
148 movq_r2m (mm0, *dest);
153 static inline void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
154 int stride, int offset, int cpu)
157 movq_m2r (*ref, mm0);
158 movq_m2r (*(ref+8), mm1);
159 pavg_m2r (*(ref+offset), mm0);
160 pavg_m2r (*(ref+offset+8), mm1);
161 movq_r2m (mm0, *dest);
163 movq_r2m (mm1, *(dest+8));
168 static inline void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
169 int stride, int offset, int cpu)
172 movq_m2r (*ref, mm0);
173 pavg_m2r (*(ref+offset), mm0);
174 pavg_m2r (*dest, mm0);
176 movq_r2m (mm0, *dest);
181 static inline void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
182 int stride, int offset, int cpu)
185 movq_m2r (*ref, mm0);
186 movq_m2r (*(ref+8), mm1);
187 pavg_m2r (*(ref+offset), mm0);
188 pavg_m2r (*(ref+offset+8), mm1);
189 pavg_m2r (*dest, mm0);
190 pavg_m2r (*(dest+8), mm1);
192 movq_r2m (mm0, *dest);
193 movq_r2m (mm1, *(dest+8));
198 static mmx_t mask_one = {0x0101010101010101LL};
200 static inline void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
203 movq_m2r (*ref, mm0);
204 movq_m2r (*(ref+1), mm1);
211 movq_m2r (*ref, mm2);
214 movq_m2r (*(ref+1), mm3);
226 pand_m2r (mask_one, mm7);
228 psubusb_r2r (mm7, mm0);
231 movq_r2m (mm0, *dest);
234 movq_r2r (mm6, mm7); // unroll !
235 movq_r2r (mm2, mm0); // unroll !
239 static inline void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
243 movq_m2r (*ref, mm0);
244 movq_m2r (*(ref+stride+1), mm1);
246 movq_m2r (*(ref+1), mm2);
248 movq_m2r (*(ref+stride), mm3);
257 pand_m2r (mask_one, mm7);
259 psubusb_r2r (mm7, mm0);
260 movq_r2m (mm0, *dest);
262 movq_m2r (*(ref+8), mm0);
263 movq_m2r (*(ref+stride+9), mm1);
265 movq_m2r (*(ref+9), mm2);
267 movq_m2r (*(ref+stride+8), mm3);
276 pand_m2r (mask_one, mm7);
278 psubusb_r2r (mm7, mm0);
280 movq_r2m (mm0, *(dest+8));
285 static inline void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
289 movq_m2r (*ref, mm0);
290 movq_m2r (*(ref+stride+1), mm1);
292 movq_m2r (*(ref+1), mm2);
294 movq_m2r (*(ref+stride), mm3);
303 pand_m2r (mask_one, mm7);
305 psubusb_r2r (mm7, mm0);
306 movq_m2r (*dest, mm1);
309 movq_r2m (mm0, *dest);
314 static inline void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
318 movq_m2r (*ref, mm0);
319 movq_m2r (*(ref+stride+1), mm1);
321 movq_m2r (*(ref+1), mm2);
323 movq_m2r (*(ref+stride), mm3);
332 pand_m2r (mask_one, mm7);
334 psubusb_r2r (mm7, mm0);
335 movq_m2r (*dest, mm1);
337 movq_r2m (mm0, *dest);
339 movq_m2r (*(ref+8), mm0);
340 movq_m2r (*(ref+stride+9), mm1);
342 movq_m2r (*(ref+9), mm2);
344 movq_m2r (*(ref+stride+8), mm3);
353 pand_m2r (mask_one, mm7);
355 psubusb_r2r (mm7, mm0);
356 movq_m2r (*(dest+8), mm1);
359 movq_r2m (mm0, *(dest+8));
364 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
365 int stride, int height)
367 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
370 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
371 int stride, int height)
373 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
376 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
377 int stride, int height)
379 MC_put1_16 (height, dest, ref, stride);
382 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
383 int stride, int height)
385 MC_put1_8 (height, dest, ref, stride);
388 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
389 int stride, int height)
391 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
394 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
395 int stride, int height)
397 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
400 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
401 int stride, int height)
403 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
406 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
407 int stride, int height)
409 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
412 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
413 int stride, int height)
415 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
418 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
419 int stride, int height)
421 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
424 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
425 int stride, int height)
427 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
430 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
431 int stride, int height)
433 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
436 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
437 int stride, int height)
439 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
442 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
443 int stride, int height)
445 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
448 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
449 int stride, int height)
451 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
454 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
455 int stride, int height)
457 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
461 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
462 int stride, int height)
464 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
467 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
468 int stride, int height)
470 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
473 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
474 int stride, int height)
476 MC_put1_16 (height, dest, ref, stride);
479 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
480 int stride, int height)
482 MC_put1_8 (height, dest, ref, stride);
485 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
486 int stride, int height)
488 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
491 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
492 int stride, int height)
494 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
497 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
498 int stride, int height)
500 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
503 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
504 int stride, int height)
506 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
509 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
510 int stride, int height)
512 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
515 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
516 int stride, int height)
518 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
521 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
522 int stride, int height)
524 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
527 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
528 int stride, int height)
530 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
533 /*****************************************************************************
534 * Functions exported as capabilities. They are declared as static so that
535 * we don't pollute the namespace too much.
536 *****************************************************************************/
537 static void motion_getfunctions( function_list_t * p_function_list )
539 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
543 /* Copying functions */
546 MC_put_16_mmxext, MC_put_x16_mmxext, MC_put_y16_mmxext, MC_put_xy16_mmxext
550 MC_put_8_mmxext, MC_put_x8_mmxext, MC_put_y8_mmxext, MC_put_xy8_mmxext
554 /* Averaging functions */
557 MC_avg_16_mmxext, MC_avg_x16_mmxext, MC_avg_y16_mmxext, MC_avg_xy16_mmxext
561 MC_avg_8_mmxext, MC_avg_x8_mmxext, MC_avg_y8_mmxext, MC_avg_xy8_mmxext
566 #define list p_function_list->functions.motion
567 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );