git.sesse.net Git - vlc/blob - plugins/motion/motionmmx.c

   1 /*****************************************************************************
   2  * motionmmx.c : MMX motion compensation module for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2001 VideoLAN
   5  * $Id: motionmmx.c,v 1.19 2002/06/02 23:29:29 sam Exp $
   6  *
   7  * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
   8  *          Michel Lespinasse <walken@zoy.org>
   9  *          Vladimir Chernyshov <greengrass@writeme.com>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 /*****************************************************************************
  27  * Preamble
  28  *****************************************************************************/
  29 #include <stdlib.h>                                      /* malloc(), free() */
  30 #include <string.h>
  31
  32 #include <vlc/vlc.h>
  33
  34 #include "mmx.h"
  35
  36 /*****************************************************************************
  37  * Local and extern prototypes.
  38  *****************************************************************************/
  39 static void motion_getfunctions( function_list_t * p_function_list );
  40
  41 /*****************************************************************************
  42  * Build configuration tree.
  43  *****************************************************************************/
  44 MODULE_CONFIG_START
  45 MODULE_CONFIG_STOP
  46
  47 MODULE_INIT_START
  48     SET_DESCRIPTION( _("MMX motion compensation module") )
  49     ADD_CAPABILITY( MOTION, 150 )
  50     ADD_REQUIREMENT( MMX )
  51     ADD_SHORTCUT( "mmx" )
  52 MODULE_INIT_STOP
  53
  54 MODULE_ACTIVATE_START
  55     motion_getfunctions( &p_module->p_functions->motion );
  56 MODULE_ACTIVATE_STOP
  57
  58 MODULE_DEACTIVATE_START
  59 MODULE_DEACTIVATE_STOP
  60
  61 /*****************************************************************************
  62  * Motion compensation in MMX
  63  *****************************************************************************/
  64
  65 // some rounding constants
  66 mmx_t round1 = {0x0001000100010001LL};
  67 mmx_t round4 = {0x0002000200020002LL};
  68
  69 /*
  70  * This code should probably be compiled with loop unrolling
  71  * (ie, -funroll-loops in gcc)becuase some of the loops
  72  * use a small static number of iterations. This was written
  73  * with the assumption the compiler knows best about when
  74  * unrolling will help
  75  */
  76
  77 static inline void mmx_zero_reg ()
  78 {
  79     // load 0 into mm0
  80     pxor_r2r (mm0, mm0);
  81 }
  82
  83 static inline void mmx_average_2_U8 (yuv_data_t * dest,
  84                                      yuv_data_t * src1, yuv_data_t * src2)
  85 {
  86     //
  87     // *dest = (*src1 + *src2 + 1)/ 2;
  88     //
  89     static mmx_t mask1 = {0x0101010101010101LL};
  90     static mmx_t mask7f = {0x7f7f7f7f7f7f7f7fLL};
  91
  92     movq_m2r (*src1, mm1);        // load 8 src1 bytes
  93     movq_r2r (mm1, mm2);
  94     psrlq_i2r (1, mm1);
  95     pand_m2r (mask7f, mm1);
  96
  97     movq_m2r (*src2, mm3);        // load 8 src2 bytes
  98     por_r2r (mm3, mm2);
  99     psrlq_i2r (1, mm3);
 100     pand_m2r (mask7f, mm3);
 101
 102     paddb_r2r (mm1, mm3);
 103     pand_m2r (mask1, mm2);
 104     paddb_r2r (mm3, mm2);
 105     movq_r2m (mm2, *dest);        // store result in dest
 106 }
 107
 108 static inline void mmx_interp_average_2_U8 (yuv_data_t * dest,
 109                                             yuv_data_t * src1, yuv_data_t * src2)
 110 {
 111     //
 112     // *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2;
 113     //
 114
 115     movq_m2r (*dest, mm1);        // load 8 dest bytes
 116     movq_r2r (mm1, mm2);        // copy 8 dest bytes
 117
 118     movq_m2r (*src1, mm3);        // load 8 src1 bytes
 119     movq_r2r (mm3, mm4);        // copy 8 src1 bytes
 120
 121     movq_m2r (*src2, mm5);        // load 8 src2 bytes
 122     movq_r2r (mm5, mm6);        // copy 8 src2 bytes
 123
 124     punpcklbw_r2r (mm0, mm1);        // unpack low dest bytes
 125     punpckhbw_r2r (mm0, mm2);        // unpack high dest bytes
 126
 127     punpcklbw_r2r (mm0, mm3);        // unpack low src1 bytes
 128     punpckhbw_r2r (mm0, mm4);        // unpack high src1 bytes
 129
 130     punpcklbw_r2r (mm0, mm5);        // unpack low src2 bytes
 131     punpckhbw_r2r (mm0, mm6);        // unpack high src2 bytes
 132
 133     paddw_r2r (mm5, mm3);        // add lows
 134     paddw_m2r (round1, mm3);
 135     psraw_i2r (1, mm3);                // /2
 136
 137     paddw_r2r (mm6, mm4);        // add highs
 138     paddw_m2r (round1, mm4);
 139     psraw_i2r (1, mm4);                // /2
 140
 141     paddw_r2r (mm3, mm1);        // add lows
 142     paddw_m2r (round1, mm1);
 143     psraw_i2r (1, mm1);                // /2
 144
 145     paddw_r2r (mm4, mm2);        // add highs
 146     paddw_m2r (round1, mm2);
 147     psraw_i2r (1, mm2);                // /2
 148
 149     packuswb_r2r (mm2, mm1);        // pack (w/ saturation)
 150     movq_r2m (mm1, *dest);        // store result in dest
 151 }
 152
 153 static inline void mmx_average_4_U8 (yuv_data_t * dest,
 154                                      yuv_data_t * src1, yuv_data_t * src2,
 155                                      yuv_data_t * src3, yuv_data_t * src4)
 156 {
 157     //
 158     // *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4;
 159     //
 160
 161     movq_m2r (*src1, mm1);        // load 8 src1 bytes
 162     movq_r2r (mm1, mm2);        // copy 8 src1 bytes
 163
 164     punpcklbw_r2r (mm0, mm1);        // unpack low src1 bytes
 165     punpckhbw_r2r (mm0, mm2);        // unpack high src1 bytes
 166
 167     movq_m2r (*src2, mm3);        // load 8 src2 bytes
 168     movq_r2r (mm3, mm4);        // copy 8 src2 bytes
 169
 170     punpcklbw_r2r (mm0, mm3);        // unpack low src2 bytes
 171     punpckhbw_r2r (mm0, mm4);        // unpack high src2 bytes
 172
 173     paddw_r2r (mm3, mm1);        // add lows
 174     paddw_r2r (mm4, mm2);        // add highs
 175
 176     // now have partials in mm1 and mm2
 177
 178     movq_m2r (*src3, mm3);        // load 8 src3 bytes
 179     movq_r2r (mm3, mm4);        // copy 8 src3 bytes
 180
 181     punpcklbw_r2r (mm0, mm3);        // unpack low src3 bytes
 182     punpckhbw_r2r (mm0, mm4);        // unpack high src3 bytes
 183
 184     paddw_r2r (mm3, mm1);        // add lows
 185     paddw_r2r (mm4, mm2);        // add highs
 186
 187     movq_m2r (*src4, mm5);        // load 8 src4 bytes
 188     movq_r2r (mm5, mm6);        // copy 8 src4 bytes
 189
 190     punpcklbw_r2r (mm0, mm5);        // unpack low src4 bytes
 191     punpckhbw_r2r (mm0, mm6);        // unpack high src4 bytes
 192
 193     paddw_r2r (mm5, mm1);        // add lows
 194     paddw_r2r (mm6, mm2);        // add highs
 195
 196     // now have subtotal in mm1 and mm2
 197
 198     paddw_m2r (round4, mm1);
 199     psraw_i2r (2, mm1);                // /4
 200     paddw_m2r (round4, mm2);
 201     psraw_i2r (2, mm2);                // /4
 202
 203     packuswb_r2r (mm2, mm1);        // pack (w/ saturation)
 204     movq_r2m (mm1, *dest);        // store result in dest
 205 }
 206
 207 static inline void mmx_interp_average_4_U8 (yuv_data_t * dest,
 208                                             yuv_data_t * src1, yuv_data_t * src2,
 209                                             yuv_data_t * src3, yuv_data_t * src4)
 210 {
 211     //
 212     // *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2;
 213     //
 214
 215     movq_m2r (*src1, mm1);        // load 8 src1 bytes
 216     movq_r2r (mm1, mm2);        // copy 8 src1 bytes
 217
 218     punpcklbw_r2r (mm0, mm1);        // unpack low src1 bytes
 219     punpckhbw_r2r (mm0, mm2);        // unpack high src1 bytes
 220
 221     movq_m2r (*src2, mm3);        // load 8 src2 bytes
 222     movq_r2r (mm3, mm4);        // copy 8 src2 bytes
 223
 224     punpcklbw_r2r (mm0, mm3);        // unpack low src2 bytes
 225     punpckhbw_r2r (mm0, mm4);        // unpack high src2 bytes
 226
 227     paddw_r2r (mm3, mm1);        // add lows
 228     paddw_r2r (mm4, mm2);        // add highs
 229
 230     // now have partials in mm1 and mm2
 231
 232     movq_m2r (*src3, mm3);        // load 8 src3 bytes
 233     movq_r2r (mm3, mm4);        // copy 8 src3 bytes
 234
 235     punpcklbw_r2r (mm0, mm3);        // unpack low src3 bytes
 236     punpckhbw_r2r (mm0, mm4);        // unpack high src3 bytes
 237
 238     paddw_r2r (mm3, mm1);        // add lows
 239     paddw_r2r (mm4, mm2);        // add highs
 240
 241     movq_m2r (*src4, mm5);        // load 8 src4 bytes
 242     movq_r2r (mm5, mm6);        // copy 8 src4 bytes
 243
 244     punpcklbw_r2r (mm0, mm5);        // unpack low src4 bytes
 245     punpckhbw_r2r (mm0, mm6);        // unpack high src4 bytes
 246
 247     paddw_r2r (mm5, mm1);        // add lows
 248     paddw_r2r (mm6, mm2);        // add highs
 249
 250     paddw_m2r (round4, mm1);
 251     psraw_i2r (2, mm1);                // /4
 252     paddw_m2r (round4, mm2);
 253     psraw_i2r (2, mm2);                // /4
 254
 255     // now have subtotal/4 in mm1 and mm2
 256
 257     movq_m2r (*dest, mm3);        // load 8 dest bytes
 258     movq_r2r (mm3, mm4);        // copy 8 dest bytes
 259
 260     punpcklbw_r2r (mm0, mm3);        // unpack low dest bytes
 261     punpckhbw_r2r (mm0, mm4);        // unpack high dest bytes
 262
 263     paddw_r2r (mm3, mm1);        // add lows
 264     paddw_r2r (mm4, mm2);        // add highs
 265
 266     paddw_m2r (round1, mm1);
 267     psraw_i2r (1, mm1);                // /2
 268     paddw_m2r (round1, mm2);
 269     psraw_i2r (1, mm2);                // /2
 270
 271     // now have end value in mm1 and mm2
 272
 273     packuswb_r2r (mm2, mm1);        // pack (w/ saturation)
 274     movq_r2m (mm1,*dest);        // store result in dest
 275 }
 276
 277 //-----------------------------------------------------------------------
 278
 279 static inline void MC_avg_mmx (int width, int height,
 280                                yuv_data_t * dest, yuv_data_t * ref, int stride)
 281 {
 282     mmx_zero_reg ();
 283
 284     do {
 285         mmx_average_2_U8 (dest, dest, ref);
 286
 287         if (width == 16)
 288             mmx_average_2_U8 (dest+8, dest+8, ref+8);
 289
 290         dest += stride;
 291         ref += stride;
 292     } while (--height);
 293 }
 294
 295 static void MC_avg_16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 296                            int stride, int height)
 297 {
 298     MC_avg_mmx (16, height, dest, ref, stride);
 299 }
 300
 301 static void MC_avg_8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 302                           int stride, int height)
 303 {
 304     MC_avg_mmx (8, height, dest, ref, stride);
 305 }
 306
 307 //-----------------------------------------------------------------------
 308
 309 static inline void MC_put_mmx (int width, int height,
 310                                yuv_data_t * dest, yuv_data_t * ref, int stride)
 311 {
 312     mmx_zero_reg ();
 313
 314     do {
 315         movq_m2r (* ref, mm1);        // load 8 ref bytes
 316         movq_r2m (mm1,* dest);        // store 8 bytes at curr
 317
 318         if (width == 16)
 319             {
 320                 movq_m2r (* (ref+8), mm1);        // load 8 ref bytes
 321                 movq_r2m (mm1,* (dest+8));        // store 8 bytes at curr
 322             }
 323
 324         dest += stride;
 325         ref += stride;
 326     } while (--height);
 327 }
 328
 329 static void MC_put_16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 330                            int stride, int height)
 331 {
 332     MC_put_mmx (16, height, dest, ref, stride);
 333 }
 334
 335 static void MC_put_8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 336                           int stride, int height)
 337 {
 338     MC_put_mmx (8, height, dest, ref, stride);
 339 }
 340
 341 //-----------------------------------------------------------------------
 342
 343 // Half pixel interpolation in the x direction
 344 static inline void MC_avg_x_mmx (int width, int height,
 345                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 346 {
 347     mmx_zero_reg ();
 348
 349     do {
 350         mmx_interp_average_2_U8 (dest, ref, ref+1);
 351
 352         if (width == 16)
 353             mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
 354
 355         dest += stride;
 356         ref += stride;
 357     } while (--height);
 358 }
 359
 360 static void MC_avg_x16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 361                             int stride, int height)
 362 {
 363     MC_avg_x_mmx (16, height, dest, ref, stride);
 364 }
 365
 366 static void MC_avg_x8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 367                            int stride, int height)
 368 {
 369     MC_avg_x_mmx (8, height, dest, ref, stride);
 370 }
 371
 372 //-----------------------------------------------------------------------
 373
 374 static inline void MC_put_x_mmx (int width, int height,
 375                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 376 {
 377     mmx_zero_reg ();
 378
 379     do {
 380         mmx_average_2_U8 (dest, ref, ref+1);
 381
 382         if (width == 16)
 383             mmx_average_2_U8 (dest+8, ref+8, ref+9);
 384
 385         dest += stride;
 386         ref += stride;
 387     } while (--height);
 388 }
 389
 390 static void MC_put_x16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 391                             int stride, int height)
 392 {
 393     MC_put_x_mmx (16, height, dest, ref, stride);
 394 }
 395
 396 static void MC_put_x8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 397                            int stride, int height)
 398 {
 399     MC_put_x_mmx (8, height, dest, ref, stride);
 400 }
 401
 402 //-----------------------------------------------------------------------
 403
 404 static inline void MC_avg_xy_8wide_mmx (int height, yuv_data_t * dest,
 405     yuv_data_t * ref, int stride)
 406 {
 407     pxor_r2r (mm0, mm0);
 408     movq_m2r (round4, mm7);
 409
 410     movq_m2r (*ref, mm1);      // calculate first row ref[0] + ref[1]
 411     movq_r2r (mm1, mm2);
 412
 413     punpcklbw_r2r (mm0, mm1);
 414     punpckhbw_r2r (mm0, mm2);
 415
 416     movq_m2r (*(ref+1), mm3);
 417     movq_r2r (mm3, mm4);
 418
 419     punpcklbw_r2r (mm0, mm3);
 420     punpckhbw_r2r (mm0, mm4);
 421
 422     paddw_r2r (mm3, mm1);
 423     paddw_r2r (mm4, mm2);
 424
 425     ref += stride;
 426
 427     do {
 428
 429         movq_m2r (*ref, mm5);   // calculate next row ref[0] + ref[1]
 430         movq_r2r (mm5, mm6);
 431
 432         punpcklbw_r2r (mm0, mm5);
 433         punpckhbw_r2r (mm0, mm6);
 434
 435         movq_m2r (*(ref+1), mm3);
 436         movq_r2r (mm3, mm4);
 437
 438         punpcklbw_r2r (mm0, mm3);
 439         punpckhbw_r2r (mm0, mm4);
 440
 441         paddw_r2r (mm3, mm5);
 442         paddw_r2r (mm4, mm6);
 443
 444         movq_r2r (mm7, mm3);   // calculate round4 + previous row + current row
 445         movq_r2r (mm7, mm4);
 446
 447         paddw_r2r (mm1, mm3);
 448         paddw_r2r (mm2, mm4);
 449
 450         paddw_r2r (mm5, mm3);
 451         paddw_r2r (mm6, mm4);
 452
 453         psraw_i2r (2, mm3);                // /4
 454         psraw_i2r (2, mm4);                // /4
 455
 456         movq_m2r (*dest, mm1);   // calculate (subtotal + dest[0] + round1) / 2
 457         movq_r2r (mm1, mm2);
 458
 459         punpcklbw_r2r (mm0, mm1);
 460         punpckhbw_r2r (mm0, mm2);
 461
 462         paddw_r2r (mm1, mm3);
 463         paddw_r2r (mm2, mm4);
 464
 465         paddw_m2r (round1, mm3);
 466         paddw_m2r (round1, mm4);
 467
 468         psraw_i2r (1, mm3);                // /2
 469         psraw_i2r (1, mm4);                // /2
 470
 471         packuswb_r2r (mm4, mm3);      // pack (w/ saturation)
 472         movq_r2m (mm3, *dest);        // store result in dest
 473
 474         movq_r2r (mm5, mm1);    // remember current row for the next pass
 475         movq_r2r (mm6, mm2);
 476
 477         ref += stride;
 478         dest += stride;
 479
 480     } while (--height);
 481 }
 482
 483 static void MC_avg_xy16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 484                              int stride, int height)
 485 {
 486     MC_avg_xy_8wide_mmx(height, dest, ref, stride);
 487     MC_avg_xy_8wide_mmx(height, dest+8, ref+8, stride);
 488 }
 489
 490 static void MC_avg_xy8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 491                             int stride, int height)
 492 {
 493     MC_avg_xy_8wide_mmx(height, dest, ref, stride);
 494 }
 495
 496 //-----------------------------------------------------------------------
 497
 498 static inline void MC_put_xy_8wide_mmx (int height, yuv_data_t * dest,
 499     yuv_data_t * ref, int stride)
 500 {
 501     pxor_r2r (mm0, mm0);
 502     movq_m2r (round4, mm7);
 503
 504     movq_m2r (*ref, mm1);      // calculate first row ref[0] + ref[1]
 505     movq_r2r (mm1, mm2);
 506
 507     punpcklbw_r2r (mm0, mm1);
 508     punpckhbw_r2r (mm0, mm2);
 509
 510     movq_m2r (*(ref+1), mm3);
 511     movq_r2r (mm3, mm4);
 512
 513     punpcklbw_r2r (mm0, mm3);
 514     punpckhbw_r2r (mm0, mm4);
 515
 516     paddw_r2r (mm3, mm1);
 517     paddw_r2r (mm4, mm2);
 518
 519     ref += stride;
 520
 521     do {
 522
 523         movq_m2r (*ref, mm5);   // calculate next row ref[0] + ref[1]
 524         movq_r2r (mm5, mm6);
 525
 526         punpcklbw_r2r (mm0, mm5);
 527         punpckhbw_r2r (mm0, mm6);
 528
 529         movq_m2r (*(ref+1), mm3);
 530         movq_r2r (mm3, mm4);
 531
 532         punpcklbw_r2r (mm0, mm3);
 533         punpckhbw_r2r (mm0, mm4);
 534
 535         paddw_r2r (mm3, mm5);
 536         paddw_r2r (mm4, mm6);
 537
 538         movq_r2r (mm7, mm3);   // calculate round4 + previous row + current row
 539         movq_r2r (mm7, mm4);
 540
 541         paddw_r2r (mm1, mm3);
 542         paddw_r2r (mm2, mm4);
 543
 544         paddw_r2r (mm5, mm3);
 545         paddw_r2r (mm6, mm4);
 546
 547         psraw_i2r (2, mm3);                // /4
 548         psraw_i2r (2, mm4);                // /4
 549
 550         packuswb_r2r (mm4, mm3);      // pack (w/ saturation)
 551         movq_r2m (mm3, *dest);        // store result in dest
 552
 553         movq_r2r (mm5, mm1);    // advance to the next row
 554         movq_r2r (mm6, mm2);
 555
 556         ref += stride;
 557         dest += stride;
 558
 559     } while (--height);
 560 }
 561
 562 static void MC_put_xy16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 563                              int stride, int height)
 564 {
 565     MC_put_xy_8wide_mmx(height, dest, ref, stride);
 566     MC_put_xy_8wide_mmx(height, dest + 8, ref + 8, stride);
 567 }
 568
 569 static void MC_put_xy8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 570                             int stride, int height)
 571 {
 572     MC_put_xy_8wide_mmx(height, dest, ref, stride);
 573 }
 574
 575 //-----------------------------------------------------------------------
 576
 577 static inline void MC_avg_y_mmx (int width, int height,
 578                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 579 {
 580     yuv_data_t * ref_next = ref+stride;
 581
 582     mmx_zero_reg ();
 583
 584     do {
 585         mmx_interp_average_2_U8 (dest, ref, ref_next);
 586
 587         if (width == 16)
 588             mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
 589
 590         dest += stride;
 591         ref += stride;
 592         ref_next += stride;
 593     } while (--height);
 594 }
 595
 596 static void MC_avg_y16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 597                             int stride, int height)
 598 {
 599     MC_avg_y_mmx (16, height, dest, ref, stride);
 600 }
 601
 602 static void MC_avg_y8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 603                            int stride, int height)
 604 {
 605     MC_avg_y_mmx (8, height, dest, ref, stride);
 606 }
 607
 608 //-----------------------------------------------------------------------
 609
 610 static inline void MC_put_y_mmx (int width, int height,
 611                                  yuv_data_t * dest, yuv_data_t * ref, int stride)
 612 {
 613     yuv_data_t * ref_next = ref+stride;
 614
 615     mmx_zero_reg ();
 616
 617     do {
 618         mmx_average_2_U8 (dest, ref, ref_next);
 619
 620         if (width == 16)
 621             mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
 622
 623         dest += stride;
 624         ref += stride;
 625         ref_next += stride;
 626     } while (--height);
 627 }
 628
 629 static void MC_put_y16_mmx (yuv_data_t * dest, yuv_data_t * ref,
 630                             int stride, int height)
 631 {
 632     MC_put_y_mmx (16, height, dest, ref, stride);
 633 }
 634
 635 static void MC_put_y8_mmx (yuv_data_t * dest, yuv_data_t * ref,
 636                            int stride, int height)
 637 {
 638     MC_put_y_mmx (8, height, dest, ref, stride);
 639 }
 640
 641
 642 /*****************************************************************************
 643  * Functions exported as capabilities. They are declared as static so that
 644  * we don't pollute the namespace too much.
 645  *****************************************************************************/
 646 static void motion_getfunctions( function_list_t * p_function_list )
 647 {
 648     static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
 649                                            int, int ) =
 650     {
 651         {
 652             /* Copying functions */
 653             {
 654                 /* Width == 16 */
 655                 MC_put_16_mmx, MC_put_x16_mmx, MC_put_y16_mmx, MC_put_xy16_mmx
 656             },
 657             {
 658                 /* Width == 8 */
 659                 MC_put_8_mmx,  MC_put_x8_mmx,  MC_put_y8_mmx, MC_put_xy8_mmx
 660             }
 661         },
 662         {
 663             /* Averaging functions */
 664             {
 665                 /* Width == 16 */
 666                 MC_avg_16_mmx, MC_avg_x16_mmx, MC_avg_y16_mmx, MC_avg_xy16_mmx
 667             },
 668             {
 669                 /* Width == 8 */
 670                 MC_avg_8_mmx,  MC_avg_x8_mmx,  MC_avg_y8_mmx,  MC_avg_xy8_mmx
 671             }
 672         }
 673     };
 674
 675 #define list p_function_list->functions.motion
 676     memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );
 677 #undef list
 678
 679     return;
 680 }
 681