1 /*****************************************************************************
2 * predict-c.c: intra prediction
3 *****************************************************************************
4 * Copyright (C) 2003-2011 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at licensing@x264.com.
26 *****************************************************************************/
28 #include "common/common.h"
32 void x264_predict_16x16_v_mmx( pixel *src );
33 void x264_predict_16x16_v_sse2( pixel *src );
34 void x264_predict_16x16_h_mmxext( pixel *src );
35 void x264_predict_16x16_h_sse2( uint16_t *src );
36 void x264_predict_16x16_h_ssse3( uint8_t *src );
37 void x264_predict_16x16_dc_core_mmxext( pixel *src, int i_dc_left );
38 void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left );
39 void x264_predict_16x16_dc_left_core_mmxext( pixel *src, int i_dc_left );
40 void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left );
41 void x264_predict_16x16_dc_top_mmxext( pixel *src );
42 void x264_predict_16x16_dc_top_sse2( pixel *src );
43 void x264_predict_16x16_dc_top_ssse3( uint16_t *src );
44 void x264_predict_16x16_p_core_mmxext( uint8_t *src, int i00, int b, int c );
45 void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c );
46 void x264_predict_8x8c_p_core_mmxext( uint8_t *src, int i00, int b, int c );
47 void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c );
48 void x264_predict_8x8c_dc_mmxext( pixel *src );
49 void x264_predict_8x8c_dc_sse2( uint16_t *src );
50 void x264_predict_8x8c_dc_top_mmxext( uint8_t *src );
51 void x264_predict_8x8c_dc_top_sse2( uint16_t *src );
52 void x264_predict_8x8c_v_mmx( pixel *src );
53 void x264_predict_8x8c_v_sse2( uint16_t *src );
54 void x264_predict_8x8c_h_mmxext( uint8_t *src );
55 void x264_predict_8x8c_h_sse2( pixel *src );
56 void x264_predict_8x8c_h_ssse3( uint8_t *src );
57 void x264_predict_8x8_v_mmxext( uint8_t *src, uint8_t edge[33] );
58 void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[33] );
59 void x264_predict_8x8_h_mmxext( uint8_t *src, uint8_t edge[33] );
60 void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[33] );
61 void x264_predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );
62 void x264_predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );
63 void x264_predict_8x8_dc_mmxext( uint8_t *src, uint8_t edge[33] );
64 void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[33] );
65 void x264_predict_8x8_dc_top_mmxext( uint8_t *src, uint8_t edge[33] );
66 void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[33] );
67 void x264_predict_8x8_dc_left_mmxext( uint8_t *src, uint8_t edge[33] );
68 void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[33] );
69 void x264_predict_8x8_ddl_mmxext( uint8_t *src, uint8_t edge[33] );
70 void x264_predict_8x8_ddr_mmxext( uint8_t *src, uint8_t edge[33] );
71 void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[33] );
72 void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[33] );
73 void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] );
74 void x264_predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] );
75 void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[33] );
76 void x264_predict_8x8_vr_ssse3( uint16_t *src, uint16_t edge[33] );
77 void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[33] );
78 void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[33] );
79 void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[33] );
80 void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[33] );
81 void x264_predict_8x8_filter_mmxext( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
82 void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[33], int i_neighbor, int i_filters );
83 void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[33], int i_neighbor, int i_filters );
84 void x264_predict_4x4_ddl_mmxext( pixel *src );
85 void x264_predict_4x4_ddl_sse2( uint16_t *src );
86 void x264_predict_4x4_ddr_mmxext( pixel *src );
87 void x264_predict_4x4_vl_mmxext( pixel *src );
88 void x264_predict_4x4_vl_sse2( uint16_t *src );
89 void x264_predict_4x4_vr_mmxext( uint8_t *src );
90 void x264_predict_4x4_vr_sse2( uint16_t *src );
91 void x264_predict_4x4_vr_ssse3( pixel *src );
92 void x264_predict_4x4_hd_mmxext( pixel *src );
93 void x264_predict_4x4_hd_sse2( uint16_t *src );
94 void x264_predict_4x4_hd_ssse3( pixel *src );
95 void x264_predict_4x4_dc_mmxext( pixel *src );
96 void x264_predict_4x4_ddr_sse2( uint16_t *src );
97 void x264_predict_4x4_ddr_ssse3( pixel *src );
98 void x264_predict_4x4_hu_mmxext( pixel *src );
100 #define PREDICT_16x16_DC(name)\
101 static void x264_predict_16x16_dc_##name( pixel *src )\
104 for( int i = 0; i < 16; i += 2 )\
106 dc += src[-1 + i * FDEC_STRIDE];\
107 dc += src[-1 + (i+1) * FDEC_STRIDE];\
109 x264_predict_16x16_dc_core_##name( src, dc );\
112 PREDICT_16x16_DC( mmxext )
113 PREDICT_16x16_DC( sse2 )
115 #define PREDICT_16x16_DC_LEFT(name)\
116 static void x264_predict_16x16_dc_left_##name( pixel *src )\
119 for( int i = 0; i < 16; i += 2 )\
121 dc += src[-1 + i * FDEC_STRIDE];\
122 dc += src[-1 + (i+1) * FDEC_STRIDE];\
124 x264_predict_16x16_dc_left_core_##name( src, dc>>4 );\
127 PREDICT_16x16_DC_LEFT( mmxext )
128 PREDICT_16x16_DC_LEFT( sse2 )
130 #define PREDICT_P_SUM(j,i)\
131 H += i * ( src[j+i - FDEC_STRIDE ] - src[j-i - FDEC_STRIDE ] );\
132 V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );\
134 ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
135 ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
136 ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
137 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
138 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
139 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
142 #define PREDICT_16x16_P(name)\
143 static void x264_predict_16x16_p_##name( pixel *src )\
157 a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
158 b = ( 5 * H + 32 ) >> 6;\
159 c = ( 5 * V + 32 ) >> 6;\
160 i00 = a - b * 7 - c * 7 + 16;\
161 x264_predict_16x16_p_core_##name( src, i00, b, c );\
164 PREDICT_16x16_P( mmxext )
166 PREDICT_16x16_P( sse2 )
167 #endif //!HIGH_BIT_DEPTH
171 static void x264_predict_16x16_p_sse2( uint16_t *src )
173 static void x264_predict_16x16_p_ssse3( uint8_t *src )
180 "movdqu -2+%1, %%xmm1 \n"
181 "movdqa 16+%1, %%xmm0 \n"
182 "pmaddwd %2, %%xmm0 \n"
183 "pmaddwd %3, %%xmm1 \n"
184 "paddd %%xmm1, %%xmm0 \n"
185 "movhlps %%xmm0, %%xmm1 \n"
186 "paddd %%xmm1, %%xmm0 \n"
187 "pshuflw $14, %%xmm0, %%xmm1 \n"
188 "paddd %%xmm1, %%xmm0 \n"
191 :"m"(src[-FDEC_STRIDE]), "m"(*pw_12345678), "m"(*pw_m87654321)
196 "movq 8+%1, %%mm0 \n"
197 "palignr $7, -8+%1, %%mm1 \n"
198 "pmaddubsw %2, %%mm0 \n"
199 "pmaddubsw %3, %%mm1 \n"
200 "paddw %%mm1, %%mm0 \n"
201 "pshufw $14, %%mm0, %%mm1 \n"
202 "paddw %%mm1, %%mm0 \n"
203 "pshufw $1, %%mm0, %%mm1 \n"
204 "paddw %%mm1, %%mm0 \n"
208 :"m"(src[-FDEC_STRIDE]), "m"(*pb_12345678), "m"(*pb_m87654321)
211 V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )
212 + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )
213 + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )
214 + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )
215 + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )
216 + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )
217 + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )
218 + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
219 a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );
220 b = ( 5 * H + 32 ) >> 6;
221 c = ( 5 * V + 32 ) >> 6;
222 i00 = a - b * 7 - c * 7 + 16;
223 /* b*15 + c*15 can overflow: it's easier to just branch away in this rare case
224 * than to try to consider it in the asm. */
225 if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) )
226 x264_predict_16x16_p_c( src );
228 x264_predict_16x16_p_core_sse2( src, i00, b, c );
234 #define PREDICT_8x8_P(name)\
235 static void x264_predict_8x8c_p_##name( uint8_t *src )\
245 a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
246 b = ( 17 * H + 16 ) >> 5;\
247 c = ( 17 * V + 16 ) >> 5;\
248 i00 = a -3*b -3*c + 16;\
249 x264_predict_8x8c_p_core_##name( src, i00, b, c );\
252 PREDICT_8x8_P( mmxext )
254 PREDICT_8x8_P( sse2 )
256 #endif //!HIGH_BIT_DEPTH
260 static void x264_predict_8x8c_p_sse2( uint16_t *src )
262 static void x264_predict_8x8c_p_ssse3( uint8_t *src )
269 "movdqa %1, %%xmm0 \n"
270 "pmaddwd %2, %%xmm0 \n"
271 "movhlps %%xmm0, %%xmm1 \n"
272 "paddd %%xmm1, %%xmm0 \n"
273 "pshuflw $14, %%xmm0, %%xmm1 \n"
274 "paddd %%xmm1, %%xmm0 \n"
277 :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)
282 "pmaddubsw %2, %%mm0 \n"
283 "pshufw $14, %%mm0, %%mm1 \n"
284 "paddw %%mm1, %%mm0 \n"
285 "pshufw $1, %%mm0, %%mm1 \n"
286 "paddw %%mm1, %%mm0 \n"
290 :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)
293 V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )
294 + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )
295 + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )
296 + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );
297 H += -4 * src[-1*FDEC_STRIDE -1];
298 a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );
299 b = ( 17 * H + 16 ) >> 5;
300 c = ( 17 * V + 16 ) >> 5;
301 i00 = a -3*b -3*c + 16;
302 /* b*7 + c*7 can overflow: it's easier to just branch away in this rare case
303 * than to try to consider it in the asm. */
304 if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 2340 || abs(c) > 2340) )
305 x264_predict_8x8c_p_c( src );
307 x264_predict_8x8c_p_core_sse2( src, i00, b, c );
312 static void x264_predict_8x8c_dc_left( uint8_t *src )
315 uint32_t s0 = 0, s1 = 0;
318 for( y = 0; y < 4; y++ )
320 s0 += src[y * FDEC_STRIDE - 1];
321 s1 += src[(y+4) * FDEC_STRIDE - 1];
323 dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
324 dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
326 for( y = 0; y < 4; y++ )
331 for( y = 0; y < 4; y++ )
341 UNUSED int l##y = edge[14-y];
343 UNUSED int t##x = edge[16+x];
344 #define PREDICT_8x8_LOAD_LEFT \
345 PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7)
346 #define PREDICT_8x8_LOAD_TOP \
347 PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7)
349 #define SUMSUB(a,b,c,d,e,f,g,h)\
355 #define INTRA_SA8D_X3(cpu)\
356 void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
358 PREDICT_8x8_LOAD_TOP\
359 PREDICT_8x8_LOAD_LEFT\
361 ALIGNED_16( int16_t sa8d_1d[2][8] );\
362 SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
363 SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
364 SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
373 SUMSUB(t0,t4,t1,t5,t2,t6,t3,t7);\
374 SUMSUB(t0,t2,t1,t3,t4,t6,t5,t7);\
375 SUMSUB(t0,t1,t2,t3,t4,t5,t6,t7);\
384 x264_intra_sa8d_x3_8x8_core_##cpu( fenc, sa8d_1d, res );\
391 INTRA_SA8D_X3(mmxext)
393 #endif // !HIGH_BIT_DEPTH
395 /****************************************************************************
396 * Exported functions:
397 ****************************************************************************/
398 void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
400 if( !(cpu&X264_CPU_MMX) )
402 pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx;
403 if( cpu&X264_CPU_MMXEXT )
405 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmxext;
406 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmxext;
407 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmxext;
408 pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmxext;
411 if( !(cpu&X264_CPU_SSE2) )
413 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
414 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
415 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
416 pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse2;
417 pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2;
418 pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
421 pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmxext;
423 if( !(cpu&X264_CPU_SSE2) )
425 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
426 pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse2;
427 if( cpu&X264_CPU_SSE2_IS_SLOW )
429 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
430 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
431 pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
432 if( !(cpu&X264_CPU_SSSE3) )
434 pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3;
436 pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
438 #endif // HIGH_BIT_DEPTH
441 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
443 if( !(cpu&X264_CPU_MMX) )
446 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
447 if( !(cpu&X264_CPU_MMXEXT) )
449 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmxext;
450 if( !(cpu&X264_CPU_SSE2) )
452 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_sse2;
453 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2;
454 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2;
455 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2;
456 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
459 pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
461 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
462 if( !(cpu&X264_CPU_MMXEXT) )
464 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_mmxext;
465 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmxext;
467 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_mmxext;
469 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmxext;
470 if( !(cpu&X264_CPU_SSE2) )
472 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
473 if( !(cpu&X264_CPU_SSSE3) )
475 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3;
477 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
479 #endif // HIGH_BIT_DEPTH
482 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
484 if( !(cpu&X264_CPU_MMXEXT) )
487 if( !(cpu&X264_CPU_SSE2) )
489 pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse2;
490 pf[I_PRED_8x8_H] = x264_predict_8x8_h_sse2;
491 pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_sse2;
492 pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_sse2;
493 pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
494 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
495 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
496 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
497 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
498 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
499 *predict_8x8_filter = x264_predict_8x8_filter_sse2;
500 if( !(cpu&X264_CPU_SSSE3) )
502 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
503 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
504 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
505 *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
507 pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
508 pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
509 pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmxext;
510 pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmxext;
511 pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmxext;
512 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_mmxext;
513 *predict_8x8_filter = x264_predict_8x8_filter_mmxext;
515 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_mmxext;
516 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_mmxext;
517 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_mmxext;
518 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_mmxext;
520 if( !(cpu&X264_CPU_SSE2) )
522 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
523 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
524 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
525 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
526 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
527 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
528 if( !(cpu&X264_CPU_SSSE3) )
530 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
531 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
532 *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
533 #endif // HIGH_BIT_DEPTH
536 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
538 if( !(cpu&X264_CPU_MMXEXT) )
540 pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_mmxext;
541 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
542 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmxext;
543 pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
544 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_mmxext;
545 pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_mmxext;
547 if( !(cpu&X264_CPU_SSE2) )
549 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
550 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_sse2;
551 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_sse2;
552 pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
553 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_sse2;
555 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
556 #endif // HIGH_BIT_DEPTH
557 if( !(cpu&X264_CPU_SSSE3) )
559 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
560 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
561 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;