git.sesse.net Git - vlc/blob - plugins/motion/motionmmx.c

   1 /*****************************************************************************
   2  * motionmmx.c : MMX motion compensation module for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2001 VideoLAN
   5  * $Id: motionmmx.c,v 1.17.2.1 2002/06/02 23:17:44 sam Exp $
   6  *
   7  * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
   8  *          Michel Lespinasse <walken@zoy.org>
   9  *          Vladimir Chernyshov <greengrass@writeme.com>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 /*****************************************************************************
  27  * Preamble
  28  *****************************************************************************/
  29 #include <stdlib.h>                                      /* malloc(), free() */
  30 #include <string.h>
  31
  32 #include <videolan/vlc.h>
  33
  34 #include "mmx.h"
  35
  36 /*****************************************************************************
  37  * Local and extern prototypes.
  38  *****************************************************************************/
  39 static void motion_getfunctions( function_list_t * p_function_list );
  40
  41 /*****************************************************************************
  42  * Build configuration tree.
  43  *****************************************************************************/
  44 MODULE_CONFIG_START
  45 MODULE_CONFIG_STOP
  46
  47 MODULE_INIT_START
  48     SET_DESCRIPTION( _("MMX motion compensation module") )
  49     ADD_CAPABILITY( MOTION, 150 )
  50     ADD_REQUIREMENT( MMX )
  51     ADD_SHORTCUT( "mmx" )
  52     ADD_SHORTCUT( "motionmmx" )
  53 MODULE_INIT_STOP
  54
  55 MODULE_ACTIVATE_START
  56     motion_getfunctions( &p_module->p_functions->motion );
  57 MODULE_ACTIVATE_STOP
  58
  59 MODULE_DEACTIVATE_START
  60 MODULE_DEACTIVATE_STOP
  61
  62 /*****************************************************************************
  63  * Motion compensation in MMX
  64  *****************************************************************************/
  65
  66 // some rounding constants
  67 mmx_t round1 = {0x0001000100010001LL};
  68 mmx_t round4 = {0x0002000200020002LL};
  69
  70 /*
  71  * This code should probably be compiled with loop unrolling
  72  * (ie, -funroll-loops in gcc)becuase some of the loops
  73  * use a small static number of iterations. This was written
  74  * with the assumption the compiler knows best about when
  75  * unrolling will help
  76  */
  77
  78 static inline void mmx_zero_reg ()
  79 {
  80     // load 0 into mm0
  81     pxor_r2r (mm0, mm0);
  82 }
  83
  84 static inline void mmx_average_2_U8 (yuv_data_t * dest,
  85                                      yuv_data_t * src1, yuv_data_t * src2)
  86 {
  87     //
  88     // *dest = (*src1 + *src2 + 1)/ 2;
  89     //
  90     static mmx_t mask1 = {0x0101010101010101LL};
  91     static mmx_t mask7f = {0x7f7f7f7f7f7f7f7fLL};
  92
  93     movq_m2r (*src1, mm1);        // load 8 src1 bytes
  94     movq_r2r (mm1, mm2);
  95     psrlq_i2r (1, mm1);
  96     pand_m2r (mask7f, mm1);
  97
  98     movq_m2r (*src2, mm3);        // load 8 src2 bytes
  99     por_r2r (mm3, mm2);
 100     psrlq_i2r (1, mm3);
 101     pand_m2r (mask7f, mm3);
 102
 103     paddb_r2r (mm1, mm3);
 104     pand_m2r (mask1, mm2);
 105     paddb_r2r (mm3, mm2);
 106     movq_r2m (mm2, *dest);        // store result in dest
 107 }
 108
 109 static inline void mmx_interp_average_2_U8 (yuv_data_t * dest,
 110                                             yuv_data_t * src1, yuv_data_t * src2)
 111 {
 112     //
 113     // *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2;
 114     //
 115
 116     movq_m2r (*dest, mm1);        // load 8 dest bytes
 117     movq_r2r (mm1, mm2);        // copy 8 dest bytes
 118
 119     movq_m2r (*src1, mm3);        // load 8 src1 bytes
 120     movq_r2r (mm3, mm4);        // copy 8 src1 bytes
 121
 122     movq_m2r (*src2, mm5);        // load 8 src2 bytes
 123     movq_r2r (mm5, mm6);        // copy 8 src2 bytes
 124
 125     punpcklbw_r2r (mm0, mm1);        // unpack low dest bytes
 126     punpckhbw_r2r (mm0, mm2);        // unpack high dest bytes
 127
 128     punpcklbw_r2r (mm0, mm3);        // unpack low src1 bytes
 129     punpckhbw_r2r (mm0, mm4);        // unpack high src1 bytes
 130
 131     punpcklbw_r2r (mm0, mm5);        // unpack low src2 bytes
 132     punpckhbw_r2r (mm0, mm6);        // unpack high src2 bytes
 133
 134     paddw_r2r (mm5, mm3);        // add lows
 135     paddw_m2r (round1, mm3);
 136     psraw_i2r (1, mm3);                // /2
 137
 138     paddw_r2r (mm6, mm4);        // add highs
 139     paddw_m2r (round1, mm4);
 140     psraw_i2r (1, mm4);                // /2
 141
 142     paddw_r2r (mm3, mm1);        // add lows
 143     paddw_m2r (round1, mm1);
 144     psraw_i2r (1, mm1);                // /2
 145
 146     paddw_r2r (mm4, mm2);        // add highs
 147     paddw_m2r (round1, mm2);
 148     psraw_i2r (1, mm2);                // /2
 149
 150     packuswb_r2r (mm2, mm1);        // pack (w/ saturation)
 151     movq_r2m (mm1, *dest);        // store result in dest
 152 }
 153
 154 static inline void mmx_average_4_U8 (yuv_data_t * dest,
 155                                      yuv_data_t * src1, yuv_data_t * src2,
 156                                      yuv_data_t * src3, yuv_data_t * src4)
 157 {
 158     //
 159     // *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4;
 160     //
 161
 162     movq_m2r (*src1, mm1);        // load 8 src1 bytes
 163     movq_r2r (mm1, mm2);        // copy 8 src1 bytes
 164
 165     punpcklbw_r2r (mm0, mm1);        // unpack low src1 bytes
 166     punpckhbw_r2r (mm0, mm2);        // unpack high src1 bytes
 167
 168     movq_m2r (*src2, mm3);        // load 8 src2 bytes
 169     movq_r2r (mm3, mm4);        // copy 8 src2 bytes
 170
 171     punpcklbw_r2r (mm0, mm3);        // unpack low src2 bytes
 172     punpckhbw_r2r (mm0, mm4);        // unpack high src2 bytes
 173
 174     paddw_r2r (mm3, mm1);        // add lows
 175     paddw_r2r (mm4, mm2);        // add highs
 176
 177     // now have partials in mm1 and mm2
 178
 179     movq_m2r (*src3, mm3);        // load 8 src3 bytes
 180     movq_r2r (mm3, mm4);        // copy 8 src3 bytes
 181
 182     punpcklbw_r2r (mm0, mm3);        // unpack low src3 bytes
 183     punpckhbw_r2r (mm0, mm4);        // unpack high src3 bytes
 184
 185     paddw_r2r (mm3, mm1);        // add lows
 186     paddw_r2r (mm4, mm2);        // add highs
 187
 188     movq_m2r (*src4, mm5);        // load 8 src4 bytes
 189     movq_r2r (mm5, mm6);        // copy 8 src4 bytes
 190
 191     punpcklbw_r2r (mm0, mm5);        // unpack low src4 bytes
 192     punpckhbw_r2r (mm0, mm6);        // unpack high src4 bytes
 193
 194     paddw_r2r (mm5, mm1);        // add lows
 195     paddw_r2r (mm6, mm2);        // add highs
 196
 197     // now have subtotal in mm1 and mm2
 198
 199     paddw_m2r (round4, mm1);
 200     psraw_i2r (2, mm1);                // /4
 201     paddw_m2r (round4, mm2);
 202     psraw_i2r (2, mm2);                // /4
 203
 204     packuswb_r2r (mm2, mm1);        // pack (w/ saturation)
 205     movq_r2m (mm1, *dest);        // store result in dest
 206 }
 207
 208 static inline void mmx_interp_average_4_U8 (yuv_data_t * dest,
 209                                             yuv_data_t * src1, yuv_data_t * src2,
 210                                             yuv_data_t * src3, yuv_data_t * src4)
 211 {
 212     //
 213     // *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2;
 214     //
 215
 216     movq_m2r (*src1, mm1);        // load 8 src1 bytes
 217     movq_r2r (mm1, mm2);        // copy 8 src1 bytes
 218
 219     punpcklbw_r2r (mm0, mm1);        // unpack low src1 bytes
 220     punpckhbw_r2r (mm0, mm2);        // unpack high src1 bytes
 221
 222     movq_m2r (*src2, mm3);        // load 8 src2 bytes
 223     movq_r2r (mm3, mm4);        // copy 8 src2 bytes
 224
 225     punpcklbw_r2r (mm0, mm3);        // unpack low src2 bytes
 226     punpckhbw_r2r (mm0, mm4);        // unpack high src2 bytes
 227
 228     paddw_r2r (mm3, mm1);        // add lows
 229     paddw_r2r (mm4, mm2);        // add highs
 230
 231     // now have partials in mm1 and mm2
 232
 233     movq_m2r (*src3, mm3);        // load 8 src3 bytes
 234     movq_r2r (mm3, mm4);        // copy 8 src3 bytes
 235
 236     punpcklbw_r2r (mm0, mm3);        // unpack low src3 bytes
 237     punpckhbw_r2r (mm0, mm4);        // unpack high src3 bytes
 238
 239     paddw_r2r (mm3, mm1);        // add lows
 240     paddw_r2r (mm4, mm2);        // add highs
 241
 242     movq_m2r (*src4, mm5);        // load 8 src4 bytes
 243     movq_r2r (mm5, mm6);        // copy 8 src4 bytes
 244
 245     punpcklbw_r2r (mm0, mm5);        // unpack low src4 bytes
 246     punpckhbw_r2r (mm0, mm6);        // unpack high src4 bytes
 247
 248     paddw_r2r (mm5, mm1);        // add lows
 249     paddw_r2r (mm6, mm2);        // add highs
 250
 251     paddw_m2r (round4, mm1);
 252     psraw_i2r (2, mm1);                // /4
 253     paddw_m2r (round4, mm2);
 254     psraw_i2r (2, mm2);                // /4
 255
 256     // now have subtotal/4 in mm1 and mm2
 257
 258     movq_m2r (*dest, mm3);        // load 8 dest bytes
 259     movq_r2r (mm3, mm4);        // copy 8 dest bytes
 260
 261     punpcklbw_r2r (mm0, mm3);        // unpack low dest bytes
 262     punpckhbw_r2r (mm0, mm4);        // unpack high dest bytes
 263
 264     paddw_r2r (mm3, mm1);        // add lows
 265     paddw_r2r (mm4, mm2);        // add highs
 266
 267     paddw_m2r (round1, mm1);
 268     psraw_i2r (1, mm1);                // /2
 269     paddw_m2r (round1, mm2);
 270     psraw_i2r (1, mm2);                // /2
 271
 272     // now have end value in mm1 and mm2
 273
 274     packuswb_r2r (mm2, mm1);        // pack (w/ saturation)
 275     movq_r2m (mm1,*dest);        // store result in dest
 276 }
 277
 278 //-----------------------------------------------------------------------
 279
 280 static inline void MC_avg_mmx (int width, int height,
 281                                yuv_data_t * dest, yuv_data_t * ref, int stride)
 282 {
 283     mmx_zero_reg ();
 284
 285     do {
 286         mmx_average_2_U8 (dest, dest, ref);
 287
 288         if (width == 16)
 289             mmx_average_2_U8 (dest+8, dest+8, ref+8);
 290
 291         dest += stride;
 292         ref += stride;
 293     } while (--height);
 294 }
 295
 296 static void MC_avg_16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 297                            int stride, int height)
 298 {
 299     MC_avg_mmx (16, height, dest, ref, stride);
 300 }
 301
 302 static void MC_avg_8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 303                           int stride, int height)
 304 {
 305     MC_avg_mmx (8, height, dest, ref, stride);
 306 }
 307
 308 //-----------------------------------------------------------------------
 309
 310 static inline void MC_put_mmx (int width, int height,
 311                                yuv_data_t * dest, yuv_data_t * ref, int stride)
 312 {
 313     mmx_zero_reg ();
 314
 315     do {
 316         movq_m2r (* ref, mm1);        // load 8 ref bytes
 317         movq_r2m (mm1,* dest);        // store 8 bytes at curr
 318
 319         if (width == 16)
 320             {
 321                 movq_m2r (* (ref+8), mm1);        // load 8 ref bytes
 322                 movq_r2m (mm1,* (dest+8));        // store 8 bytes at curr
 323             }
 324
 325         dest += stride;
 326         ref += stride;
 327     } while (--height);
 328 }
 329
 330 static void MC_put_16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 331                            int stride, int height)
 332 {
 333     MC_put_mmx (16, height, dest, ref, stride);
 334 }
 335
 336 static void MC_put_8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 337                           int stride, int height)
 338 {
 339     MC_put_mmx (8, height, dest, ref, stride);
 340 }
 341
 342 //-----------------------------------------------------------------------
 343
 344 // Half pixel interpolation in the x direction
 345 static inline void MC_avg_x_mmx (int width, int height,
 346                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 347 {
 348     mmx_zero_reg ();
 349
 350     do {
 351         mmx_interp_average_2_U8 (dest, ref, ref+1);
 352
 353         if (width == 16)
 354             mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
 355
 356         dest += stride;
 357         ref += stride;
 358     } while (--height);
 359 }
 360
 361 static void MC_avg_x16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 362                             int stride, int height)
 363 {
 364     MC_avg_x_mmx (16, height, dest, ref, stride);
 365 }
 366
 367 static void MC_avg_x8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 368                            int stride, int height)
 369 {
 370     MC_avg_x_mmx (8, height, dest, ref, stride);
 371 }
 372
 373 //-----------------------------------------------------------------------
 374
 375 static inline void MC_put_x_mmx (int width, int height,
 376                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 377 {
 378     mmx_zero_reg ();
 379
 380     do {
 381         mmx_average_2_U8 (dest, ref, ref+1);
 382
 383         if (width == 16)
 384             mmx_average_2_U8 (dest+8, ref+8, ref+9);
 385
 386         dest += stride;
 387         ref += stride;
 388     } while (--height);
 389 }
 390
 391 static void MC_put_x16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 392                             int stride, int height)
 393 {
 394     MC_put_x_mmx (16, height, dest, ref, stride);
 395 }
 396
 397 static void MC_put_x8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 398                            int stride, int height)
 399 {
 400     MC_put_x_mmx (8, height, dest, ref, stride);
 401 }
 402
 403 //-----------------------------------------------------------------------
 404
 405 static inline void MC_avg_xy_8wide_mmx (int height, yuv_data_t * dest,
 406     yuv_data_t * ref, int stride)
 407 {
 408     pxor_r2r (mm0, mm0);
 409     movq_m2r (round4, mm7);
 410
 411     movq_m2r (*ref, mm1);      // calculate first row ref[0] + ref[1]
 412     movq_r2r (mm1, mm2);
 413
 414     punpcklbw_r2r (mm0, mm1);
 415     punpckhbw_r2r (mm0, mm2);
 416
 417     movq_m2r (*(ref+1), mm3);
 418     movq_r2r (mm3, mm4);
 419
 420     punpcklbw_r2r (mm0, mm3);
 421     punpckhbw_r2r (mm0, mm4);
 422
 423     paddw_r2r (mm3, mm1);
 424     paddw_r2r (mm4, mm2);
 425
 426     ref += stride;
 427
 428     do {
 429
 430         movq_m2r (*ref, mm5);   // calculate next row ref[0] + ref[1]
 431         movq_r2r (mm5, mm6);
 432
 433         punpcklbw_r2r (mm0, mm5);
 434         punpckhbw_r2r (mm0, mm6);
 435
 436         movq_m2r (*(ref+1), mm3);
 437         movq_r2r (mm3, mm4);
 438
 439         punpcklbw_r2r (mm0, mm3);
 440         punpckhbw_r2r (mm0, mm4);
 441
 442         paddw_r2r (mm3, mm5);
 443         paddw_r2r (mm4, mm6);
 444
 445         movq_r2r (mm7, mm3);   // calculate round4 + previous row + current row
 446         movq_r2r (mm7, mm4);
 447
 448         paddw_r2r (mm1, mm3);
 449         paddw_r2r (mm2, mm4);
 450
 451         paddw_r2r (mm5, mm3);
 452         paddw_r2r (mm6, mm4);
 453
 454         psraw_i2r (2, mm3);                // /4
 455         psraw_i2r (2, mm4);                // /4
 456
 457         movq_m2r (*dest, mm1);   // calculate (subtotal + dest[0] + round1) / 2
 458         movq_r2r (mm1, mm2);
 459
 460         punpcklbw_r2r (mm0, mm1);
 461         punpckhbw_r2r (mm0, mm2);
 462
 463         paddw_r2r (mm1, mm3);
 464         paddw_r2r (mm2, mm4);
 465
 466         paddw_m2r (round1, mm3);
 467         paddw_m2r (round1, mm4);
 468
 469         psraw_i2r (1, mm3);                // /2
 470         psraw_i2r (1, mm4);                // /2
 471
 472         packuswb_r2r (mm4, mm3);      // pack (w/ saturation)
 473         movq_r2m (mm3, *dest);        // store result in dest
 474
 475         movq_r2r (mm5, mm1);    // remember current row for the next pass
 476         movq_r2r (mm6, mm2);
 477
 478         ref += stride;
 479         dest += stride;
 480
 481     } while (--height);
 482 }
 483
 484 static void MC_avg_xy16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 485                              int stride, int height)
 486 {
 487     MC_avg_xy_8wide_mmx(height, dest, ref, stride);
 488     MC_avg_xy_8wide_mmx(height, dest+8, ref+8, stride);
 489 }
 490
 491 static void MC_avg_xy8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 492                             int stride, int height)
 493 {
 494     MC_avg_xy_8wide_mmx(height, dest, ref, stride);
 495 }
 496
 497 //-----------------------------------------------------------------------
 498
 499 static inline void MC_put_xy_8wide_mmx (int height, yuv_data_t * dest,
 500     yuv_data_t * ref, int stride)
 501 {
 502     pxor_r2r (mm0, mm0);
 503     movq_m2r (round4, mm7);
 504
 505     movq_m2r (*ref, mm1);      // calculate first row ref[0] + ref[1]
 506     movq_r2r (mm1, mm2);
 507
 508     punpcklbw_r2r (mm0, mm1);
 509     punpckhbw_r2r (mm0, mm2);
 510
 511     movq_m2r (*(ref+1), mm3);
 512     movq_r2r (mm3, mm4);
 513
 514     punpcklbw_r2r (mm0, mm3);
 515     punpckhbw_r2r (mm0, mm4);
 516
 517     paddw_r2r (mm3, mm1);
 518     paddw_r2r (mm4, mm2);
 519
 520     ref += stride;
 521
 522     do {
 523
 524         movq_m2r (*ref, mm5);   // calculate next row ref[0] + ref[1]
 525         movq_r2r (mm5, mm6);
 526
 527         punpcklbw_r2r (mm0, mm5);
 528         punpckhbw_r2r (mm0, mm6);
 529
 530         movq_m2r (*(ref+1), mm3);
 531         movq_r2r (mm3, mm4);
 532
 533         punpcklbw_r2r (mm0, mm3);
 534         punpckhbw_r2r (mm0, mm4);
 535
 536         paddw_r2r (mm3, mm5);
 537         paddw_r2r (mm4, mm6);
 538
 539         movq_r2r (mm7, mm3);   // calculate round4 + previous row + current row
 540         movq_r2r (mm7, mm4);
 541
 542         paddw_r2r (mm1, mm3);
 543         paddw_r2r (mm2, mm4);
 544
 545         paddw_r2r (mm5, mm3);
 546         paddw_r2r (mm6, mm4);
 547
 548         psraw_i2r (2, mm3);                // /4
 549         psraw_i2r (2, mm4);                // /4
 550
 551         packuswb_r2r (mm4, mm3);      // pack (w/ saturation)
 552         movq_r2m (mm3, *dest);        // store result in dest
 553
 554         movq_r2r (mm5, mm1);    // advance to the next row
 555         movq_r2r (mm6, mm2);
 556
 557         ref += stride;
 558         dest += stride;
 559
 560     } while (--height);
 561 }
 562
 563 static void MC_put_xy16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 564                              int stride, int height)
 565 {
 566     MC_put_xy_8wide_mmx(height, dest, ref, stride);
 567     MC_put_xy_8wide_mmx(height, dest + 8, ref + 8, stride);
 568 }
 569
 570 static void MC_put_xy8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 571                             int stride, int height)
 572 {
 573     MC_put_xy_8wide_mmx(height, dest, ref, stride);
 574 }
 575
 576 //-----------------------------------------------------------------------
 577
 578 static inline void MC_avg_y_mmx (int width, int height,
 579                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 580 {
 581     yuv_data_t * ref_next = ref+stride;
 582
 583     mmx_zero_reg ();
 584
 585     do {
 586         mmx_interp_average_2_U8 (dest, ref, ref_next);
 587
 588         if (width == 16)
 589             mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
 590
 591         dest += stride;
 592         ref += stride;
 593         ref_next += stride;
 594     } while (--height);
 595 }
 596
 597 static void MC_avg_y16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 598                             int stride, int height)
 599 {
 600     MC_avg_y_mmx (16, height, dest, ref, stride);
 601 }
 602
 603 static void MC_avg_y8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 604                            int stride, int height)
 605 {
 606     MC_avg_y_mmx (8, height, dest, ref, stride);
 607 }
 608
 609 //-----------------------------------------------------------------------
 610
 611 static inline void MC_put_y_mmx (int width, int height,
 612                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 613 {
 614     yuv_data_t * ref_next = ref+stride;
 615
 616     mmx_zero_reg ();
 617
 618     do {
 619         mmx_average_2_U8 (dest, ref, ref_next);
 620
 621         if (width == 16)
 622             mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
 623
 624         dest += stride;
 625         ref += stride;
 626         ref_next += stride;
 627     } while (--height);
 628 }
 629
 630 static void MC_put_y16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 631                             int stride, int height)
 632 {
 633     MC_put_y_mmx (16, height, dest, ref, stride);
 634 }
 635
 636 static void MC_put_y8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 637                            int stride, int height)
 638 {
 639     MC_put_y_mmx (8, height, dest, ref, stride);
 640 }
 641
 642
 643 /*****************************************************************************
 644  * Functions exported as capabilities. They are declared as static so that
 645  * we don't pollute the namespace too much.
 646  *****************************************************************************/
 647 static void motion_getfunctions( function_list_t * p_function_list )
 648 {
 649     static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
 650                                            int, int ) =
 651     {
 652         {
 653             /* Copying functions */
 654             {
 655                 /* Width == 16 */
 656                 MC_put_16_mmx, MC_put_x16_mmx, MC_put_y16_mmx, MC_put_xy16_mmx
 657             },
 658             {
 659                 /* Width == 8 */
 660                 MC_put_8_mmx,  MC_put_x8_mmx,  MC_put_y8_mmx, MC_put_xy8_mmx
 661             }
 662         },
 663         {
 664             /* Averaging functions */
 665             {
 666                 /* Width == 16 */
 667                 MC_avg_16_mmx, MC_avg_x16_mmx, MC_avg_y16_mmx, MC_avg_xy16_mmx
 668             },
 669             {
 670                 /* Width == 8 */
 671                 MC_avg_8_mmx,  MC_avg_x8_mmx,  MC_avg_y8_mmx,  MC_avg_xy8_mmx
 672             }
 673         }
 674     };
 675
 676 #define list p_function_list->functions.motion
 677     memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );
 678 #undef list
 679
 680     return;
 681 }
 682