git.sesse.net Git - x264/blob - common/pixel.c

   1 /*****************************************************************************
   2  * pixel.c: h264 encoder
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: pixel.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include "common.h"
  25 #include "clip1.h"
  26
  27 #ifdef HAVE_MMX
  28 #   include "i386/pixel.h"
  29 #endif
  30 #ifdef ARCH_PPC
  31 #   include "ppc/pixel.h"
  32 #endif
  33 #ifdef ARCH_UltraSparc
  34 #   include "sparc/pixel.h"
  35 #endif
  36
  37
  38 /****************************************************************************
  39  * pixel_sad_WxH
  40  ****************************************************************************/
  41 #define PIXEL_SAD_C( name, lx, ly ) \
  42 static int name( uint8_t *pix1, int i_stride_pix1,  \
  43                  uint8_t *pix2, int i_stride_pix2 ) \
  44 {                                                   \
  45     int i_sum = 0;                                  \
  46     int x, y;                                       \
  47     for( y = 0; y < ly; y++ )                       \
  48     {                                               \
  49         for( x = 0; x < lx; x++ )                   \
  50         {                                           \
  51             i_sum += abs( pix1[x] - pix2[x] );      \
  52         }                                           \
  53         pix1 += i_stride_pix1;                      \
  54         pix2 += i_stride_pix2;                      \
  55     }                                               \
  56     return i_sum;                                   \
  57 }
  58
  59
  60 PIXEL_SAD_C( x264_pixel_sad_16x16, 16, 16 )
  61 PIXEL_SAD_C( x264_pixel_sad_16x8,  16,  8 )
  62 PIXEL_SAD_C( x264_pixel_sad_8x16,   8, 16 )
  63 PIXEL_SAD_C( x264_pixel_sad_8x8,    8,  8 )
  64 PIXEL_SAD_C( x264_pixel_sad_8x4,    8,  4 )
  65 PIXEL_SAD_C( x264_pixel_sad_4x8,    4,  8 )
  66 PIXEL_SAD_C( x264_pixel_sad_4x4,    4,  4 )
  67
  68
  69 /****************************************************************************
  70  * pixel_ssd_WxH
  71  ****************************************************************************/
  72 #define PIXEL_SSD_C( name, lx, ly ) \
  73 static int name( uint8_t *pix1, int i_stride_pix1,  \
  74                  uint8_t *pix2, int i_stride_pix2 ) \
  75 {                                                   \
  76     int i_sum = 0;                                  \
  77     int x, y;                                       \
  78     for( y = 0; y < ly; y++ )                       \
  79     {                                               \
  80         for( x = 0; x < lx; x++ )                   \
  81         {                                           \
  82             int d = pix1[x] - pix2[x];              \
  83             i_sum += d*d;                           \
  84         }                                           \
  85         pix1 += i_stride_pix1;                      \
  86         pix2 += i_stride_pix2;                      \
  87     }                                               \
  88     return i_sum;                                   \
  89 }
  90
  91 PIXEL_SSD_C( x264_pixel_ssd_16x16, 16, 16 )
  92 PIXEL_SSD_C( x264_pixel_ssd_16x8,  16,  8 )
  93 PIXEL_SSD_C( x264_pixel_ssd_8x16,   8, 16 )
  94 PIXEL_SSD_C( x264_pixel_ssd_8x8,    8,  8 )
  95 PIXEL_SSD_C( x264_pixel_ssd_8x4,    8,  4 )
  96 PIXEL_SSD_C( x264_pixel_ssd_4x8,    4,  8 )
  97 PIXEL_SSD_C( x264_pixel_ssd_4x4,    4,  4 )
  98
  99 int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
 100 {
 101     int64_t i_ssd = 0;
 102     int x, y;
 103
 104 #define SSD(size) i_ssd += pf->ssd[size]( pix1 + y*i_pix1 + x, i_pix1, \
 105                                           pix2 + y*i_pix2 + x, i_pix2 );
 106     for( y = 0; y < i_height-15; y += 16 )
 107     {
 108         for( x = 0; x < i_width-15; x += 16 )
 109             SSD(PIXEL_16x16);
 110         if( x < i_width-7 )
 111             SSD(PIXEL_8x16);
 112     }
 113     if( y < i_height-7 )
 114         for( x = 0; x < i_width-7; x += 8 )
 115             SSD(PIXEL_8x8);
 116 #undef SSD
 117
 118 #define SSD1 { int d = pix1[y*i_pix1+x] - pix2[y*i_pix2+x]; i_ssd += d*d; }
 119     if( i_width % 8 != 0 )
 120     {
 121         for( y = 0; y < (i_height & ~7); y++ )
 122             for( x = i_width & ~7; x < i_width; x++ )
 123                 SSD1;
 124     }
 125     if( i_height % 8 != 0 )
 126     {
 127         for( y = i_height & ~7; y < i_height; y++ )
 128             for( x = 0; x < i_width; x++ )
 129                 SSD1;
 130     }
 131 #undef SSD1
 132
 133     return i_ssd;
 134 }
 135
 136
 137 static inline void pixel_sub_wxh( int16_t *diff, int i_size,
 138                                   uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
 139 {
 140     int y, x;
 141     for( y = 0; y < i_size; y++ )
 142     {
 143         for( x = 0; x < i_size; x++ )
 144         {
 145             diff[x + y*i_size] = pix1[x] - pix2[x];
 146         }
 147         pix1 += i_pix1;
 148         pix2 += i_pix2;
 149     }
 150 }
 151
 152
 153 /****************************************************************************
 154  * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
 155  ****************************************************************************/
 156 static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
 157 {
 158     int16_t tmp[4][4];
 159     int16_t diff[4][4];
 160     int x, y;
 161     int i_satd = 0;
 162
 163     for( y = 0; y < i_height; y += 4 )
 164     {
 165         for( x = 0; x < i_width; x += 4 )
 166         {
 167             int d;
 168
 169             pixel_sub_wxh( (int16_t*)diff, 4, &pix1[x], i_pix1, &pix2[x], i_pix2 );
 170
 171             for( d = 0; d < 4; d++ )
 172             {
 173                 int s01, s23;
 174                 int d01, d23;
 175
 176                 s01 = diff[d][0] + diff[d][1]; s23 = diff[d][2] + diff[d][3];
 177                 d01 = diff[d][0] - diff[d][1]; d23 = diff[d][2] - diff[d][3];
 178
 179                 tmp[d][0] = s01 + s23;
 180                 tmp[d][1] = s01 - s23;
 181                 tmp[d][2] = d01 - d23;
 182                 tmp[d][3] = d01 + d23;
 183             }
 184             for( d = 0; d < 4; d++ )
 185             {
 186                 int s01, s23;
 187                 int d01, d23;
 188
 189                 s01 = tmp[0][d] + tmp[1][d]; s23 = tmp[2][d] + tmp[3][d];
 190                 d01 = tmp[0][d] - tmp[1][d]; d23 = tmp[2][d] - tmp[3][d];
 191
 192                 i_satd += abs( s01 + s23 ) + abs( s01 - s23 ) + abs( d01 - d23 ) + abs( d01 + d23 );
 193             }
 194
 195         }
 196         pix1 += 4 * i_pix1;
 197         pix2 += 4 * i_pix2;
 198     }
 199
 200     return i_satd / 2;
 201 }
 202 #define PIXEL_SATD_C( name, width, height ) \
 203 static int name( uint8_t *pix1, int i_stride_pix1, \
 204                  uint8_t *pix2, int i_stride_pix2 ) \
 205 { \
 206     return pixel_satd_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
 207 }
 208 PIXEL_SATD_C( x264_pixel_satd_16x16, 16, 16 )
 209 PIXEL_SATD_C( x264_pixel_satd_16x8,  16, 8 )
 210 PIXEL_SATD_C( x264_pixel_satd_8x16,  8, 16 )
 211 PIXEL_SATD_C( x264_pixel_satd_8x8,   8, 8 )
 212 PIXEL_SATD_C( x264_pixel_satd_8x4,   8, 4 )
 213 PIXEL_SATD_C( x264_pixel_satd_4x8,   4, 8 )
 214 PIXEL_SATD_C( x264_pixel_satd_4x4,   4, 4 )
 215
 216
 217 /****************************************************************************
 218  * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
 219  ****************************************************************************/
 220 #define SA8D_1D {\
 221     const int a0 = SRC(0) + SRC(4);\
 222     const int a4 = SRC(0) - SRC(4);\
 223     const int a1 = SRC(1) + SRC(5);\
 224     const int a5 = SRC(1) - SRC(5);\
 225     const int a2 = SRC(2) + SRC(6);\
 226     const int a6 = SRC(2) - SRC(6);\
 227     const int a3 = SRC(3) + SRC(7);\
 228     const int a7 = SRC(3) - SRC(7);\
 229     const int b0 = a0 + a2;\
 230     const int b2 = a0 - a2;\
 231     const int b1 = a1 + a3;\
 232     const int b3 = a1 - a3;\
 233     const int b4 = a4 + a6;\
 234     const int b6 = a4 - a6;\
 235     const int b5 = a5 + a7;\
 236     const int b7 = a5 - a7;\
 237     DST(0, b0 + b1);\
 238     DST(1, b0 - b1);\
 239     DST(2, b2 + b3);\
 240     DST(3, b2 - b3);\
 241     DST(4, b4 + b5);\
 242     DST(5, b4 - b5);\
 243     DST(6, b6 + b7);\
 244     DST(7, b6 - b7);\
 245 }
 246
 247 static inline int pixel_sa8d_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2,
 248                                   int i_width, int i_height )
 249 {
 250     int16_t diff[8][8];
 251     int i_satd = 0;
 252     int x, y;
 253
 254     for( y = 0; y < i_height; y += 8 )
 255     {
 256         for( x = 0; x < i_width; x += 8 )
 257         {
 258             int i;
 259             pixel_sub_wxh( (int16_t*)diff, 8, pix1+x, i_pix1, pix2+x, i_pix2 );
 260
 261 #define SRC(x)     diff[i][x]
 262 #define DST(x,rhs) diff[i][x] = (rhs)
 263             for( i = 0; i < 8; i++ )
 264                 SA8D_1D
 265 #undef SRC
 266 #undef DST
 267
 268 #define SRC(x)     diff[x][i]
 269 #define DST(x,rhs) i_satd += abs(rhs)
 270             for( i = 0; i < 8; i++ )
 271                 SA8D_1D
 272 #undef SRC
 273 #undef DST
 274         }
 275         pix1 += 8 * i_pix1;
 276         pix2 += 8 * i_pix2;
 277     }
 278
 279     return i_satd;
 280 }
 281
 282 #define PIXEL_SA8D_C( width, height ) \
 283 static int x264_pixel_sa8d_##width##x##height( uint8_t *pix1, int i_stride_pix1, \
 284                                                uint8_t *pix2, int i_stride_pix2 ) \
 285 { \
 286     return ( pixel_sa8d_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ) + 2 ) >> 2; \
 287 }
 288 PIXEL_SA8D_C( 16, 16 )
 289 PIXEL_SA8D_C( 16, 8 )
 290 PIXEL_SA8D_C( 8, 16 )
 291 PIXEL_SA8D_C( 8, 8 )
 292
 293 /****************************************************************************
 294  * pixel_sad_x4
 295  ****************************************************************************/
 296 #define SAD_X( size ) \
 297 static void x264_pixel_sad_x3_##size( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )\
 298 {\
 299     scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
 300     scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
 301     scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
 302 }\
 303 static void x264_pixel_sad_x4_##size( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )\
 304 {\
 305     scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
 306     scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
 307     scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
 308     scores[3] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix3, i_stride );\
 309 }
 310
 311 SAD_X( 16x16 )
 312 SAD_X( 16x8 )
 313 SAD_X( 8x16 )
 314 SAD_X( 8x8 )
 315 SAD_X( 8x4 )
 316 SAD_X( 4x8 )
 317 SAD_X( 4x4 )
 318
 319 #ifdef ARCH_UltraSparc
 320 SAD_X( 16x16_vis )
 321 SAD_X( 16x8_vis )
 322 SAD_X( 8x16_vis )
 323 SAD_X( 8x8_vis )
 324 #endif
 325
 326 /****************************************************************************
 327  * structural similarity metric
 328  ****************************************************************************/
 329 static void ssim_4x4x2_core( const uint8_t *pix1, int stride1,
 330                              const uint8_t *pix2, int stride2,
 331                              int sums[2][4])
 332 {
 333     int x, y, z;
 334     for(z=0; z<2; z++)
 335     {
 336         uint32_t s1=0, s2=0, ss=0, s12=0;
 337         for(y=0; y<4; y++)
 338             for(x=0; x<4; x++)
 339             {
 340                 int a = pix1[x+y*stride1];
 341                 int b = pix2[x+y*stride2];
 342                 s1  += a;
 343                 s2  += b;
 344                 ss  += a*a;
 345                 ss  += b*b;
 346                 s12 += a*b;
 347             }
 348         sums[z][0] = s1;
 349         sums[z][1] = s2;
 350         sums[z][2] = ss;
 351         sums[z][3] = s12;
 352         pix1 += 4;
 353         pix2 += 4;
 354     }
 355 }
 356
 357 static float ssim_end1( int s1, int s2, int ss, int s12 )
 358 {
 359     static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
 360     static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
 361     int vars = ss*64 - s1*s1 - s2*s2;
 362     int covar = s12*64 - s1*s2;
 363     return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)\
 364            / ((float)(s1*s1 + s2*s2 + ssim_c1) * (float)(vars + ssim_c2));
 365 }
 366
 367 static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
 368 {
 369     int i;
 370     float ssim = 0.0;
 371     for( i = 0; i < width; i++ )
 372         ssim += ssim_end1( sum0[i][0] + sum0[i+1][0] + sum1[i][0] + sum1[i+1][0],
 373                            sum0[i][1] + sum0[i+1][1] + sum1[i][1] + sum1[i+1][1],
 374                            sum0[i][2] + sum0[i+1][2] + sum1[i][2] + sum1[i+1][2],
 375                            sum0[i][3] + sum0[i+1][3] + sum1[i][3] + sum1[i+1][3] );
 376     return ssim;
 377 }
 378
 379 float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
 380                            uint8_t *pix1, int stride1,
 381                            uint8_t *pix2, int stride2,
 382                            int width, int height )
 383 {
 384     int x, y, z;
 385     float ssim = 0.0;
 386     int (*sum0)[4] = x264_malloc(4 * (width/4+3) * sizeof(int));
 387     int (*sum1)[4] = x264_malloc(4 * (width/4+3) * sizeof(int));
 388     width >>= 2;
 389     height >>= 2;
 390     z = 0;
 391     for( y = 1; y < height; y++ )
 392     {
 393         for( ; z <= y; z++ )
 394         {
 395             XCHG( void*, sum0, sum1 );
 396             for( x = 0; x < width; x+=2 )
 397                 pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] );
 398         }
 399         for( x = 0; x < width-1; x += 4 )
 400             ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) );
 401     }
 402     x264_free(sum0);
 403     x264_free(sum1);
 404     return ssim / ((width-1) * (height-1));
 405 }
 406
 407
 408 /****************************************************************************
 409  * successive elimination
 410  ****************************************************************************/
 411 static void pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
 412                         uint16_t *res, int width )
 413 {
 414     int i;
 415     for( i=0; i<width; i++, sums++ )
 416         res[i] = abs( enc_dc[0] - sums[0] )
 417                + abs( enc_dc[1] - sums[8] )
 418                + abs( enc_dc[2] - sums[delta] )
 419                + abs( enc_dc[3] - sums[delta+8] );
 420 }
 421
 422 static void pixel_ads2( int enc_dc[2], uint16_t *sums, int delta,
 423                         uint16_t *res, int width )
 424 {
 425     int i;
 426     for( i=0; i<width; i++, sums++ )
 427         res[i] = abs( enc_dc[0] - sums[0] )
 428                + abs( enc_dc[1] - sums[delta] );
 429 }
 430
 431 static void pixel_ads1( int enc_dc[1], uint16_t *sums, int delta,
 432                         uint16_t *res, int width )
 433 {
 434     int i;
 435     for( i=0; i<width; i++, sums++ )
 436         res[i] = abs( enc_dc[0] - sums[0] );
 437 }
 438
 439
 440 /****************************************************************************
 441  * x264_pixel_init:
 442  ****************************************************************************/
 443 void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
 444 {
 445     memset( pixf, 0, sizeof(*pixf) );
 446
 447 #define INIT( name, cpu ) \
 448     pixf->name[PIXEL_16x16] = x264_pixel_##name##_16x16##cpu;\
 449     pixf->name[PIXEL_16x8]  = x264_pixel_##name##_16x8##cpu;\
 450     pixf->name[PIXEL_8x16]  = x264_pixel_##name##_8x16##cpu;\
 451     pixf->name[PIXEL_8x8]   = x264_pixel_##name##_8x8##cpu;\
 452     pixf->name[PIXEL_8x4]   = x264_pixel_##name##_8x4##cpu;\
 453     pixf->name[PIXEL_4x8]   = x264_pixel_##name##_4x8##cpu;\
 454     pixf->name[PIXEL_4x4]   = x264_pixel_##name##_4x4##cpu;
 455
 456     INIT( sad, );
 457     INIT( sad_x3, );
 458     INIT( sad_x4, );
 459     INIT( ssd, );
 460     INIT( satd, );
 461
 462     pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16;
 463     pixf->sa8d[PIXEL_16x8] = x264_pixel_sa8d_16x8;
 464     pixf->sa8d[PIXEL_8x16] = x264_pixel_sa8d_8x16;
 465     pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8;
 466     pixf->ssim_4x4x2_core = ssim_4x4x2_core;
 467     pixf->ssim_end4 = ssim_end4;
 468
 469     pixf->ads[PIXEL_16x16] = pixel_ads4;
 470     pixf->ads[PIXEL_16x8] = pixel_ads2;
 471     pixf->ads[PIXEL_8x8] = pixel_ads1;
 472
 473 #ifdef HAVE_MMX
 474     if( cpu&X264_CPU_MMX )
 475     {
 476         INIT( ssd, _mmx );
 477     }
 478
 479     if( cpu&X264_CPU_MMXEXT )
 480     {
 481         INIT( sad, _mmxext );
 482         INIT( sad_x3, _mmxext );
 483         INIT( sad_x4, _mmxext );
 484         INIT( satd, _mmxext );
 485
 486         pixf->sad_pde[PIXEL_16x16] = x264_pixel_sad_pde_16x16_mmxext;
 487         pixf->sad_pde[PIXEL_16x8 ] = x264_pixel_sad_pde_16x8_mmxext;
 488         pixf->sad_pde[PIXEL_8x16 ] = x264_pixel_sad_pde_8x16_mmxext;
 489
 490         pixf->ads[PIXEL_16x16] = x264_pixel_ads4_mmxext;
 491         pixf->ads[PIXEL_16x8 ] = x264_pixel_ads2_mmxext;
 492         pixf->ads[PIXEL_8x8  ] = x264_pixel_ads1_mmxext;
 493
 494 #ifdef ARCH_X86
 495         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
 496         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_mmxext;
 497         pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
 498         pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_mmxext;
 499 #endif
 500         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
 501         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmxext;
 502         pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmxext;
 503     }
 504
 505     // disable on AMD processors since it is slower
 506     if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
 507     {
 508         pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_sse2;
 509         pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_sse2;
 510
 511         pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_sse2;
 512         pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_sse2;
 513         pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_sse2;
 514         pixf->satd[PIXEL_8x8]  = x264_pixel_satd_8x8_sse2;
 515         pixf->satd[PIXEL_8x4]  = x264_pixel_satd_8x4_sse2;
 516
 517 #ifdef ARCH_X86
 518         pixf->sad_x3[PIXEL_16x16] = x264_pixel_sad_x3_16x16_sse2;
 519         pixf->sad_x3[PIXEL_16x8 ] = x264_pixel_sad_x3_16x8_sse2;
 520
 521         pixf->sad_x4[PIXEL_16x16] = x264_pixel_sad_x4_16x16_sse2;
 522         pixf->sad_x4[PIXEL_16x8 ] = x264_pixel_sad_x4_16x8_sse2;
 523 #endif
 524     }
 525     // these are faster on both Intel and AMD
 526     if( cpu&X264_CPU_SSE2 )
 527     {
 528         pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_sse2;
 529         pixf->ssd[PIXEL_16x8]  = x264_pixel_ssd_16x8_sse2;
 530         pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
 531         pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
 532
 533 #ifdef ARCH_X86_64
 534         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
 535         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
 536         pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
 537 #endif
 538     }
 539
 540     if( cpu&X264_CPU_SSSE3 )
 541     {
 542 #ifdef HAVE_SSE3
 543         pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3;
 544         pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3;
 545         pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3;
 546         pixf->satd[PIXEL_8x8]  = x264_pixel_satd_8x8_ssse3;
 547         pixf->satd[PIXEL_8x4]  = x264_pixel_satd_8x4_ssse3;
 548 #ifdef ARCH_X86_64
 549         pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
 550         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
 551 #endif
 552 #endif
 553     }
 554 #endif //HAVE_MMX
 555
 556 #ifdef ARCH_PPC
 557     if( cpu&X264_CPU_ALTIVEC )
 558     {
 559         x264_pixel_altivec_init( pixf );
 560     }
 561 #endif
 562 #ifdef ARCH_UltraSparc
 563     pixf->sad[PIXEL_8x8]   = x264_pixel_sad_8x8_vis;
 564     pixf->sad[PIXEL_8x16]  = x264_pixel_sad_8x16_vis;
 565     pixf->sad[PIXEL_16x8]  = x264_pixel_sad_16x8_vis;
 566     pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_vis;
 567
 568     pixf->sad_x3[PIXEL_8x8]   = x264_pixel_sad_x3_8x8_vis;
 569     pixf->sad_x3[PIXEL_8x16]  = x264_pixel_sad_x3_8x16_vis;
 570     pixf->sad_x3[PIXEL_16x8]  = x264_pixel_sad_x3_16x8_vis;
 571     pixf->sad_x3[PIXEL_16x16] = x264_pixel_sad_x3_16x16_vis;
 572
 573     pixf->sad_x4[PIXEL_8x8]   = x264_pixel_sad_x4_8x8_vis;
 574     pixf->sad_x4[PIXEL_8x16]  = x264_pixel_sad_x4_8x16_vis;
 575     pixf->sad_x4[PIXEL_16x8]  = x264_pixel_sad_x4_16x8_vis;
 576     pixf->sad_x4[PIXEL_16x16] = x264_pixel_sad_x4_16x16_vis;
 577 #endif
 578
 579     pixf->ads[PIXEL_8x16] =
 580     pixf->ads[PIXEL_8x4] =
 581     pixf->ads[PIXEL_4x8] = pixf->ads[PIXEL_16x8];
 582     pixf->ads[PIXEL_4x4] = pixf->ads[PIXEL_8x8];
 583 }
 584