git.sesse.net Git - x264/blob - common/x86/predict-c.c

   1 /*****************************************************************************
   2  * predict.c: h264 encoder
   3  *****************************************************************************
   4  * Copyright (C) 2003-2008 x264 project
   5  *
   6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   7  *          Loren Merritt <lorenm@u.washington.edu>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include "common/common.h"
  25 #include "predict.h"
  26 #include "pixel.h"
  27
  28 extern void predict_16x16_v_mmx( uint8_t *src );
  29 extern void predict_16x16_h_mmxext( uint8_t *src );
  30 extern void predict_16x16_h_ssse3( uint8_t *src );
  31 extern void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left );
  32 extern void predict_16x16_dc_left_core_mmxext( uint8_t *src, int i_dc_left );
  33 extern void predict_16x16_dc_top_mmxext( uint8_t *src );
  34 extern void predict_16x16_p_core_mmxext( uint8_t *src, int i00, int b, int c );
  35 extern void predict_8x8c_p_core_mmxext( uint8_t *src, int i00, int b, int c );
  36 extern void predict_8x8c_p_core_sse2( uint8_t *src, int i00, int b, int c );
  37 extern void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 );
  38 extern void predict_8x8c_dc_top_mmxext( uint8_t *src );
  39 extern void predict_8x8c_v_mmx( uint8_t *src );
  40 extern void predict_8x8c_h_mmxext( uint8_t *src );
  41 extern void predict_8x8c_h_ssse3( uint8_t *src );
  42 extern void predict_8x8_v_mmxext( uint8_t *src, uint8_t edge[33] );
  43 extern void predict_8x8_h_mmxext( uint8_t *src, uint8_t edge[33] );
  44 extern void predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );
  45 extern void predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );
  46 extern void predict_8x8_dc_mmxext( uint8_t *src, uint8_t edge[33] );
  47 extern void predict_8x8_dc_top_mmxext( uint8_t *src, uint8_t edge[33] );
  48 extern void predict_8x8_dc_left_mmxext( uint8_t *src, uint8_t edge[33] );
  49 extern void predict_8x8_ddl_mmxext( uint8_t *src, uint8_t edge[33] );
  50 extern void predict_8x8_ddr_mmxext( uint8_t *src, uint8_t edge[33] );
  51 extern void predict_8x8_ddl_sse2( uint8_t *src, uint8_t edge[33] );
  52 extern void predict_8x8_ddr_sse2( uint8_t *src, uint8_t edge[33] );
  53 extern void predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] );
  54 extern void predict_8x8_vr_sse2( uint8_t *src, uint8_t edge[33] );
  55 extern void predict_8x8_hu_sse2( uint8_t *src, uint8_t edge[33] );
  56 extern void predict_8x8_hd_sse2( uint8_t *src, uint8_t edge[33] );
  57 extern void predict_8x8_vr_core_mmxext( uint8_t *src, uint8_t edge[33] );
  58 extern void predict_8x8_hd_ssse3( uint8_t *src, uint8_t edge[33] );
  59 extern void predict_8x8_hu_ssse3( uint8_t *src, uint8_t edge[33] );
  60 extern void predict_8x8_filter_mmxext   ( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
  61 extern void predict_8x8_filter_ssse3   ( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
  62 extern void predict_4x4_ddl_mmxext( uint8_t *src );
  63 extern void predict_4x4_ddr_mmxext( uint8_t *src );
  64 extern void predict_4x4_vl_mmxext( uint8_t *src );
  65 extern void predict_4x4_vr_mmxext( uint8_t *src );
  66 extern void predict_4x4_vr_ssse3( uint8_t *src );
  67 extern void predict_4x4_hd_mmxext( uint8_t *src );
  68 extern void predict_4x4_hd_ssse3( uint8_t *src );
  69 extern void predict_4x4_dc_mmxext( uint8_t *src );
  70 extern void predict_4x4_ddr_ssse3( uint8_t *src );
  71 extern void predict_4x4_hu_mmxext( uint8_t *src );
  72 extern void predict_16x16_dc_top_sse2( uint8_t *src );
  73 extern void predict_16x16_dc_core_sse2( uint8_t *src, int i_dc_left );
  74 extern void predict_16x16_dc_left_core_sse2( uint8_t *src, int i_dc_left );
  75 extern void predict_16x16_v_sse2( uint8_t *src );
  76 extern void predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
  77
  78 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
  79 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
  80 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
  81
  82 #define PREDICT_P_SUM(j,i)\
  83     H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
  84     V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );\
  85
  86 #define PREDICT_16x16_P(name)\
  87 static void predict_16x16_p_##name( uint8_t *src )\
  88 {\
  89     int a, b, c;\
  90     int H = 0;\
  91     int V = 0;\
  92     int i00;\
  93     PREDICT_P_SUM(7,1) \
  94     PREDICT_P_SUM(7,2) \
  95     PREDICT_P_SUM(7,3) \
  96     PREDICT_P_SUM(7,4) \
  97     PREDICT_P_SUM(7,5) \
  98     PREDICT_P_SUM(7,6) \
  99     PREDICT_P_SUM(7,7) \
 100     PREDICT_P_SUM(7,8) \
 101     a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
 102     b = ( 5 * H + 32 ) >> 6;\
 103     c = ( 5 * V + 32 ) >> 6;\
 104     i00 = a - b * 7 - c * 7 + 16;\
 105     predict_16x16_p_core_##name( src, i00, b, c );\
 106 }
 107
 108 #ifndef ARCH_X86_64
 109 PREDICT_16x16_P( mmxext )
 110 #endif
 111 PREDICT_16x16_P( sse2   )
 112
 113 #ifdef __GNUC__
 114 static void predict_16x16_p_ssse3( uint8_t *src )
 115 {
 116     int a, b, c, i00;
 117     int H, V;
 118     asm (
 119         "movq           %1, %%mm1 \n"
 120         "movq         8+%1, %%mm0 \n"
 121         "palignr $7, -8+%1, %%mm1 \n"
 122         "pmaddubsw      %2, %%mm0 \n"
 123         "pmaddubsw      %3, %%mm1 \n"
 124         "paddw       %%mm1, %%mm0 \n"
 125         "pshufw $14, %%mm0, %%mm1 \n"
 126         "paddw       %%mm1, %%mm0 \n"
 127         "pshufw  $1, %%mm0, %%mm1 \n"
 128         "paddw       %%mm1, %%mm0 \n"
 129         "movd        %%mm0, %0    \n"
 130         "movsx         %w0, %0    \n"
 131         :"=r"(H)
 132         :"m"(src[-FDEC_STRIDE]), "m"(*pb_12345678), "m"(*pb_m87654321)
 133     );
 134     V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )
 135       + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )
 136       + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )
 137       + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )
 138       + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )
 139       + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )
 140       + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )
 141       + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
 142     a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );
 143     b = ( 5 * H + 32 ) >> 6;
 144     c = ( 5 * V + 32 ) >> 6;
 145     i00 = a - b * 7 - c * 7 + 16;
 146     predict_16x16_p_core_sse2( src, i00, b, c );
 147 }
 148 #endif
 149
 150 #define PREDICT_8x8_P(name)\
 151 static void predict_8x8c_p_##name( uint8_t *src )\
 152 {\
 153     int a, b, c;\
 154     int H = 0;\
 155     int V = 0;\
 156     int i00;\
 157     PREDICT_P_SUM(3,1)\
 158     PREDICT_P_SUM(3,2)\
 159     PREDICT_P_SUM(3,3)\
 160     PREDICT_P_SUM(3,4)\
 161     a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
 162     b = ( 17 * H + 16 ) >> 5;\
 163     c = ( 17 * V + 16 ) >> 5;\
 164     i00 = a -3*b -3*c + 16;\
 165     predict_8x8c_p_core_##name( src, i00, b, c );\
 166 }
 167
 168 #ifndef ARCH_X86_64
 169 PREDICT_8x8_P( mmxext )
 170 #endif
 171 PREDICT_8x8_P( sse2   )
 172
 173 #ifdef __GNUC__
 174 static void predict_8x8c_p_ssse3( uint8_t *src )
 175 {
 176     int a, b, c, i00;
 177     int H, V;
 178     asm (
 179         "movq           %1, %%mm0 \n"
 180         "pmaddubsw      %2, %%mm0 \n"
 181         "pshufw $14, %%mm0, %%mm1 \n"
 182         "paddw       %%mm1, %%mm0 \n"
 183         "pshufw  $1, %%mm0, %%mm1 \n"
 184         "paddw       %%mm1, %%mm0 \n"
 185         "movd        %%mm0, %0    \n"
 186         "movsx         %w0, %0    \n"
 187         :"=r"(H)
 188         :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)
 189     );
 190     V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )
 191       + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )
 192       + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )
 193       + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );
 194     H += -4 * src[-1*FDEC_STRIDE -1];
 195     a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );
 196     b = ( 17 * H + 16 ) >> 5;
 197     c = ( 17 * V + 16 ) >> 5;
 198     i00 = a -3*b -3*c + 16;
 199     predict_8x8c_p_core_sse2( src, i00, b, c );
 200 }
 201 #endif
 202
 203 #define PREDICT_16x16_DC(name)\
 204 static void predict_16x16_dc_##name( uint8_t *src )\
 205 {\
 206     uint32_t dc=16;\
 207     int i;\
 208     for( i = 0; i < 16; i+=2 )\
 209     {\
 210         dc += src[-1 + i * FDEC_STRIDE];\
 211         dc += src[-1 + (i+1) * FDEC_STRIDE];\
 212     }\
 213     predict_16x16_dc_core_##name( src, dc );\
 214 }
 215
 216 PREDICT_16x16_DC( mmxext )
 217 PREDICT_16x16_DC( sse2 )
 218
 219 #define PREDICT_16x16_DC_LEFT(name)\
 220 static void predict_16x16_dc_left_##name( uint8_t *src )\
 221 {\
 222     uint32_t dc=8;\
 223     int i;\
 224     for( i = 0; i < 16; i+=2 )\
 225     {\
 226         dc += src[-1 + i * FDEC_STRIDE];\
 227         dc += src[-1 + (i+1) * FDEC_STRIDE];\
 228     }\
 229     predict_16x16_dc_left_core_##name( src, dc>>4 );\
 230 }
 231
 232 PREDICT_16x16_DC_LEFT( mmxext )
 233 PREDICT_16x16_DC_LEFT( sse2 )
 234
 235 static void predict_8x8c_dc_mmxext( uint8_t *src )
 236 {
 237     int s2 = 4
 238        + src[-1 + 0*FDEC_STRIDE]
 239        + src[-1 + 1*FDEC_STRIDE]
 240        + src[-1 + 2*FDEC_STRIDE]
 241        + src[-1 + 3*FDEC_STRIDE];
 242
 243     int s3 = 2
 244        + src[-1 + 4*FDEC_STRIDE]
 245        + src[-1 + 5*FDEC_STRIDE]
 246        + src[-1 + 6*FDEC_STRIDE]
 247        + src[-1 + 7*FDEC_STRIDE];
 248
 249     predict_8x8c_dc_core_mmxext( src, s2, s3 );
 250 }
 251
 252 #ifdef ARCH_X86_64
 253 static void predict_8x8c_dc_left( uint8_t *src )
 254 {
 255     int y;
 256     uint32_t s0 = 0, s1 = 0;
 257     uint64_t dc0, dc1;
 258
 259     for( y = 0; y < 4; y++ )
 260     {
 261         s0 += src[y * FDEC_STRIDE     - 1];
 262         s1 += src[(y+4) * FDEC_STRIDE - 1];
 263     }
 264     dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
 265     dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
 266
 267     for( y = 0; y < 4; y++ )
 268     {
 269         M64( src ) = dc0;
 270         src += FDEC_STRIDE;
 271     }
 272     for( y = 0; y < 4; y++ )
 273     {
 274         M64( src ) = dc1;
 275         src += FDEC_STRIDE;
 276     }
 277
 278 }
 279 #endif
 280
 281 /****************************************************************************
 282  * 8x8 prediction for intra luma block
 283  ****************************************************************************/
 284
 285 #define PL(y) \
 286     UNUSED int l##y = edge[14-y];
 287 #define PT(x) \
 288     UNUSED int t##x = edge[16+x];
 289 #define PREDICT_8x8_LOAD_TOPLEFT \
 290     int lt = edge[15];
 291 #define PREDICT_8x8_LOAD_LEFT \
 292     PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7)
 293 #define PREDICT_8x8_LOAD_TOP \
 294     PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7)
 295
 296 #define PREDICT_8x8_DC(v) \
 297     int y; \
 298     for( y = 0; y < 8; y++ ) { \
 299         M32( src+0 ) = v; \
 300         M32( src+4 ) = v; \
 301         src += FDEC_STRIDE; \
 302     }
 303
 304 #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
 305
 306 #ifndef ARCH_X86_64
 307 static void predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] )
 308 {
 309     predict_8x8_vr_core_mmxext( src, edge );
 310     {
 311         PREDICT_8x8_LOAD_TOPLEFT
 312         PREDICT_8x8_LOAD_LEFT
 313         SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
 314         SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
 315         SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
 316         SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
 317         SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
 318         SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
 319     }
 320 }
 321 #endif
 322
 323 #define SUMSUB(a,b,c,d,e,f,g,h)\
 324     t=a; a+=b; b-=t;\
 325     t=c; c+=d; d-=t;\
 326     t=e; e+=f; f-=t;\
 327     t=g; g+=h; h-=t;
 328
 329 #define INTRA_SA8D_X3(cpu) \
 330 void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
 331 {\
 332     PREDICT_8x8_LOAD_TOP\
 333     PREDICT_8x8_LOAD_LEFT\
 334     int t;\
 335     ALIGNED_16( int16_t sa8d_1d[2][8] );\
 336     SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
 337     SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
 338     SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
 339     sa8d_1d[0][0] = l0;\
 340     sa8d_1d[0][1] = l1;\
 341     sa8d_1d[0][2] = l2;\
 342     sa8d_1d[0][3] = l3;\
 343     sa8d_1d[0][4] = l4;\
 344     sa8d_1d[0][5] = l5;\
 345     sa8d_1d[0][6] = l6;\
 346     sa8d_1d[0][7] = l7;\
 347     SUMSUB(t0,t4,t1,t5,t2,t6,t3,t7);\
 348     SUMSUB(t0,t2,t1,t3,t4,t6,t5,t7);\
 349     SUMSUB(t0,t1,t2,t3,t4,t5,t6,t7);\
 350     sa8d_1d[1][0] = t0;\
 351     sa8d_1d[1][1] = t1;\
 352     sa8d_1d[1][2] = t2;\
 353     sa8d_1d[1][3] = t3;\
 354     sa8d_1d[1][4] = t4;\
 355     sa8d_1d[1][5] = t5;\
 356     sa8d_1d[1][6] = t6;\
 357     sa8d_1d[1][7] = t7;\
 358     x264_intra_sa8d_x3_8x8_core_##cpu( fenc, sa8d_1d, res );\
 359 }
 360
 361 #ifdef ARCH_X86_64
 362 INTRA_SA8D_X3(sse2)
 363 INTRA_SA8D_X3(ssse3)
 364 #else
 365 INTRA_SA8D_X3(mmxext)
 366 #endif
 367
 368 /****************************************************************************
 369  * Exported functions:
 370  ****************************************************************************/
 371 void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
 372 {
 373     if( !(cpu&X264_CPU_MMX) )
 374         return;
 375     pf[I_PRED_16x16_V]       = predict_16x16_v_mmx;
 376     if( !(cpu&X264_CPU_MMXEXT) )
 377         return;
 378     pf[I_PRED_16x16_DC]      = predict_16x16_dc_mmxext;
 379     pf[I_PRED_16x16_DC_TOP]  = predict_16x16_dc_top_mmxext;
 380     pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_mmxext;
 381 #ifndef ARCH_X86_64
 382     pf[I_PRED_16x16_P]       = predict_16x16_p_mmxext;
 383 #endif
 384     pf[I_PRED_16x16_H]       = predict_16x16_h_mmxext;
 385     if( !(cpu&X264_CPU_SSE2) )
 386         return;
 387     pf[I_PRED_16x16_DC]     = predict_16x16_dc_sse2;
 388     pf[I_PRED_16x16_V]      = predict_16x16_v_sse2;
 389     if( cpu&X264_CPU_SSE2_IS_SLOW )
 390         return;
 391     pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2;
 392     pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_sse2;
 393     pf[I_PRED_16x16_P]      = predict_16x16_p_sse2;
 394     if( !(cpu&X264_CPU_SSSE3) )
 395         return;
 396     pf[I_PRED_16x16_H]      = predict_16x16_h_ssse3;
 397 #ifdef __GNUC__
 398     pf[I_PRED_16x16_P]      = predict_16x16_p_ssse3;
 399 #endif
 400 }
 401
 402 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
 403 {
 404     if( !(cpu&X264_CPU_MMX) )
 405         return;
 406 #ifdef ARCH_X86_64
 407     pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left;
 408 #endif
 409     pf[I_PRED_CHROMA_V]       = predict_8x8c_v_mmx;
 410     if( !(cpu&X264_CPU_MMXEXT) )
 411         return;
 412     pf[I_PRED_CHROMA_DC_TOP]  = predict_8x8c_dc_top_mmxext;
 413     pf[I_PRED_CHROMA_H]       = predict_8x8c_h_mmxext;
 414 #ifndef ARCH_X86_64
 415     pf[I_PRED_CHROMA_P]       = predict_8x8c_p_mmxext;
 416 #endif
 417     pf[I_PRED_CHROMA_DC]      = predict_8x8c_dc_mmxext;
 418     if( !(cpu&X264_CPU_SSE2) )
 419         return;
 420     pf[I_PRED_CHROMA_P]       = predict_8x8c_p_sse2;
 421     if( !(cpu&X264_CPU_SSSE3) )
 422         return;
 423     pf[I_PRED_CHROMA_H]       = predict_8x8c_h_ssse3;
 424 #ifdef __GNUC__
 425     pf[I_PRED_CHROMA_P]       = predict_8x8c_p_ssse3;
 426 #endif
 427 }
 428
 429 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
 430 {
 431     if( !(cpu&X264_CPU_MMXEXT) )
 432         return;
 433     pf[I_PRED_8x8_V]   = predict_8x8_v_mmxext;
 434     pf[I_PRED_8x8_H]   = predict_8x8_h_mmxext;
 435     pf[I_PRED_8x8_DC]  = predict_8x8_dc_mmxext;
 436     pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_mmxext;
 437     pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left_mmxext;
 438     pf[I_PRED_8x8_HD]   = predict_8x8_hd_mmxext;
 439     *predict_8x8_filter = predict_8x8_filter_mmxext;
 440 #ifdef ARCH_X86
 441     pf[I_PRED_8x8_DDL] = predict_8x8_ddl_mmxext;
 442     pf[I_PRED_8x8_DDR] = predict_8x8_ddr_mmxext;
 443     pf[I_PRED_8x8_VR]  = predict_8x8_vr_mmxext;
 444     pf[I_PRED_8x8_HU]   = predict_8x8_hu_mmxext;
 445 #endif
 446     if( !(cpu&X264_CPU_SSE2) )
 447         return;
 448     pf[I_PRED_8x8_DDL] = predict_8x8_ddl_sse2;
 449     pf[I_PRED_8x8_VL]  = predict_8x8_vl_sse2;
 450     pf[I_PRED_8x8_VR]  = predict_8x8_vr_sse2;
 451     pf[I_PRED_8x8_DDR] = predict_8x8_ddr_sse2;
 452     pf[I_PRED_8x8_HD]   = predict_8x8_hd_sse2;
 453     pf[I_PRED_8x8_HU]   = predict_8x8_hu_sse2;
 454     if( !(cpu&X264_CPU_SSSE3) )
 455         return;
 456     pf[I_PRED_8x8_HD]   = predict_8x8_hd_ssse3;
 457     pf[I_PRED_8x8_HU]   = predict_8x8_hu_ssse3;
 458     *predict_8x8_filter = predict_8x8_filter_ssse3;
 459 }
 460
 461 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
 462 {
 463     if( !(cpu&X264_CPU_MMXEXT) )
 464         return;
 465     pf[I_PRED_4x4_VR]  = predict_4x4_vr_mmxext;
 466     pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext;
 467     pf[I_PRED_4x4_VL]  = predict_4x4_vl_mmxext;
 468     pf[I_PRED_4x4_DC]  = predict_4x4_dc_mmxext;
 469     pf[I_PRED_4x4_DDR] = predict_4x4_ddr_mmxext;
 470     pf[I_PRED_4x4_HD]  = predict_4x4_hd_mmxext;
 471     pf[I_PRED_4x4_HU]  = predict_4x4_hu_mmxext;
 472     if( !(cpu&X264_CPU_SSSE3) )
 473         return;
 474     pf[I_PRED_4x4_DDR] = predict_4x4_ddr_ssse3;
 475     pf[I_PRED_4x4_VR]  = predict_4x4_vr_ssse3;
 476     pf[I_PRED_4x4_HD]  = predict_4x4_hd_ssse3;
 477 }