1 /*****************************************************************************
2 * motion3dnow.c : 3D Now! motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motion3dnow.c,v 1.12 2002/07/31 20:56:52 sam Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Michel Lespinasse <walken@zoy.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <stdlib.h> /* malloc(), free() */
35 /*****************************************************************************
37 *****************************************************************************/
38 static int Open ( vlc_object_t * );
40 /*****************************************************************************
42 *****************************************************************************/
44 set_description( _("3D Now! motion compensation module") );
45 set_capability( "motion compensation", 150 );
46 add_requirement( 3DNOW );
47 add_shortcut( "3dn" );
48 add_shortcut( "3dnow" );
49 set_callbacks( Open, NULL );
52 /*****************************************************************************
53 * Motion compensation in 3D Now! (OK I know this does MMXEXT too and it's ugly)
54 *****************************************************************************/
60 //CPU_MMXEXT/CPU_3DNOW adaptation layer
62 #define pavg_r2r(src,dest) \
64 if (cpu == CPU_MMXEXT) \
65 pavgb_r2r (src, dest); \
67 pavgusb_r2r (src, dest); \
70 #define pavg_m2r(src,dest) \
72 if (cpu == CPU_MMXEXT) \
73 pavgb_m2r (src, dest); \
75 pavgusb_m2r (src, dest); \
82 static inline void MC_put1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
87 movq_r2m (mm0, *dest);
93 static inline void MC_put1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
98 movq_m2r (*(ref+8), mm1);
100 movq_r2m (mm0, *dest);
101 movq_r2m (mm1, *(dest+8));
106 static inline void MC_avg1_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
110 movq_m2r (*ref, mm0);
111 pavg_m2r (*dest, mm0);
113 movq_r2m (mm0, *dest);
118 static inline void MC_avg1_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
122 movq_m2r (*ref, mm0);
123 movq_m2r (*(ref+8), mm1);
124 pavg_m2r (*dest, mm0);
125 pavg_m2r (*(dest+8), mm1);
126 movq_r2m (mm0, *dest);
128 movq_r2m (mm1, *(dest+8));
133 static inline void MC_put2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
134 int stride, int offset, int cpu)
137 movq_m2r (*ref, mm0);
138 pavg_m2r (*(ref+offset), mm0);
140 movq_r2m (mm0, *dest);
145 static inline void MC_put2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
146 int stride, int offset, int cpu)
149 movq_m2r (*ref, mm0);
150 movq_m2r (*(ref+8), mm1);
151 pavg_m2r (*(ref+offset), mm0);
152 pavg_m2r (*(ref+offset+8), mm1);
153 movq_r2m (mm0, *dest);
155 movq_r2m (mm1, *(dest+8));
160 static inline void MC_avg2_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
161 int stride, int offset, int cpu)
164 movq_m2r (*ref, mm0);
165 pavg_m2r (*(ref+offset), mm0);
166 pavg_m2r (*dest, mm0);
168 movq_r2m (mm0, *dest);
173 static inline void MC_avg2_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
174 int stride, int offset, int cpu)
177 movq_m2r (*ref, mm0);
178 movq_m2r (*(ref+8), mm1);
179 pavg_m2r (*(ref+offset), mm0);
180 pavg_m2r (*(ref+offset+8), mm1);
181 pavg_m2r (*dest, mm0);
182 pavg_m2r (*(dest+8), mm1);
184 movq_r2m (mm0, *dest);
185 movq_r2m (mm1, *(dest+8));
190 static mmx_t mask_one = {0x0101010101010101LL};
192 static inline void MC_put4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
195 movq_m2r (*ref, mm0);
196 movq_m2r (*(ref+1), mm1);
203 movq_m2r (*ref, mm2);
206 movq_m2r (*(ref+1), mm3);
218 pand_m2r (mask_one, mm7);
220 psubusb_r2r (mm7, mm0);
223 movq_r2m (mm0, *dest);
226 movq_r2r (mm6, mm7); // unroll !
227 movq_r2r (mm2, mm0); // unroll !
231 static inline void MC_put4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
235 movq_m2r (*ref, mm0);
236 movq_m2r (*(ref+stride+1), mm1);
238 movq_m2r (*(ref+1), mm2);
240 movq_m2r (*(ref+stride), mm3);
249 pand_m2r (mask_one, mm7);
251 psubusb_r2r (mm7, mm0);
252 movq_r2m (mm0, *dest);
254 movq_m2r (*(ref+8), mm0);
255 movq_m2r (*(ref+stride+9), mm1);
257 movq_m2r (*(ref+9), mm2);
259 movq_m2r (*(ref+stride+8), mm3);
268 pand_m2r (mask_one, mm7);
270 psubusb_r2r (mm7, mm0);
272 movq_r2m (mm0, *(dest+8));
277 static inline void MC_avg4_8 (int height, yuv_data_t * dest, yuv_data_t * ref,
281 movq_m2r (*ref, mm0);
282 movq_m2r (*(ref+stride+1), mm1);
284 movq_m2r (*(ref+1), mm2);
286 movq_m2r (*(ref+stride), mm3);
295 pand_m2r (mask_one, mm7);
297 psubusb_r2r (mm7, mm0);
298 movq_m2r (*dest, mm1);
301 movq_r2m (mm0, *dest);
306 static inline void MC_avg4_16 (int height, yuv_data_t * dest, yuv_data_t * ref,
310 movq_m2r (*ref, mm0);
311 movq_m2r (*(ref+stride+1), mm1);
313 movq_m2r (*(ref+1), mm2);
315 movq_m2r (*(ref+stride), mm3);
324 pand_m2r (mask_one, mm7);
326 psubusb_r2r (mm7, mm0);
327 movq_m2r (*dest, mm1);
329 movq_r2m (mm0, *dest);
331 movq_m2r (*(ref+8), mm0);
332 movq_m2r (*(ref+stride+9), mm1);
334 movq_m2r (*(ref+9), mm2);
336 movq_m2r (*(ref+stride+8), mm3);
345 pand_m2r (mask_one, mm7);
347 psubusb_r2r (mm7, mm0);
348 movq_m2r (*(dest+8), mm1);
351 movq_r2m (mm0, *(dest+8));
356 static void MC_avg_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
357 int stride, int height)
359 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
362 static void MC_avg_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
363 int stride, int height)
365 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
368 static void MC_put_16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
369 int stride, int height)
371 MC_put1_16 (height, dest, ref, stride);
374 static void MC_put_8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
375 int stride, int height)
377 MC_put1_8 (height, dest, ref, stride);
380 static void MC_avg_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
381 int stride, int height)
383 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
386 static void MC_avg_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
387 int stride, int height)
389 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
392 static void MC_put_x16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
393 int stride, int height)
395 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
398 static void MC_put_x8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
399 int stride, int height)
401 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
404 static void MC_avg_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
405 int stride, int height)
407 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
410 static void MC_avg_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
411 int stride, int height)
413 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
416 static void MC_put_y16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
417 int stride, int height)
419 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
422 static void MC_put_y8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
423 int stride, int height)
425 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
428 static void MC_avg_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
429 int stride, int height)
431 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
434 static void MC_avg_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
435 int stride, int height)
437 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
440 static void MC_put_xy16_mmxext (yuv_data_t * dest, yuv_data_t * ref,
441 int stride, int height)
443 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
446 static void MC_put_xy8_mmxext (yuv_data_t * dest, yuv_data_t * ref,
447 int stride, int height)
449 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
453 static void MC_avg_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
454 int stride, int height)
456 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
459 static void MC_avg_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
460 int stride, int height)
462 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
465 static void MC_put_16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
466 int stride, int height)
468 MC_put1_16 (height, dest, ref, stride);
471 static void MC_put_8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
472 int stride, int height)
474 MC_put1_8 (height, dest, ref, stride);
477 static void MC_avg_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
478 int stride, int height)
480 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
483 static void MC_avg_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
484 int stride, int height)
486 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
489 static void MC_put_x16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
490 int stride, int height)
492 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
495 static void MC_put_x8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
496 int stride, int height)
498 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
501 static void MC_avg_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
502 int stride, int height)
504 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
507 static void MC_avg_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
508 int stride, int height)
510 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
513 static void MC_put_y16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
514 int stride, int height)
516 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
519 static void MC_put_y8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
520 int stride, int height)
522 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
525 static void MC_avg_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
526 int stride, int height)
528 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
531 static void MC_avg_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
532 int stride, int height)
534 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
537 static void MC_put_xy16_3dnow (yuv_data_t * dest, yuv_data_t * ref,
538 int stride, int height)
540 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
543 static void MC_put_xy8_3dnow (yuv_data_t * dest, yuv_data_t * ref,
544 int stride, int height)
546 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
549 /*****************************************************************************
550 * Functions exported as capabilities. They are declared as static so that
551 * we don't pollute the namespace too much.
552 *****************************************************************************/
553 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *, int, int ) =
555 /* Copying functions */
558 { MC_put_16_3dnow, MC_put_x16_3dnow, MC_put_y16_3dnow, MC_put_xy16_3dnow },
560 { MC_put_8_3dnow, MC_put_x8_3dnow, MC_put_y8_3dnow, MC_put_xy8_3dnow }
562 /* Averaging functions */
565 { MC_avg_16_3dnow, MC_avg_x16_3dnow, MC_avg_y16_3dnow, MC_avg_xy16_3dnow },
567 { MC_avg_8_3dnow, MC_avg_x8_3dnow, MC_avg_y8_3dnow, MC_avg_xy8_3dnow }
571 static int Open ( vlc_object_t *p_this )
573 p_this->p_private = ppppf_motion;