1 /*****************************************************************************
2 * predict-c.c: intra prediction
3 *****************************************************************************
4 * Copyright (C) 2003-2013 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at licensing@x264.com.
26 *****************************************************************************/
28 #include "common/common.h"
32 #define PREDICT_16x16_DC(name)\
33 void x264_predict_16x16_dc_##name( pixel *src )\
36 for( int i = 0; i < 16; i += 2 )\
38 dc += src[-1 + i * FDEC_STRIDE];\
39 dc += src[-1 + (i+1) * FDEC_STRIDE];\
41 x264_predict_16x16_dc_core_##name( src, dc );\
44 PREDICT_16x16_DC( mmx2 )
45 PREDICT_16x16_DC( sse2 )
46 PREDICT_16x16_DC( avx2 )
48 #define PREDICT_16x16_DC_LEFT(name)\
49 static void x264_predict_16x16_dc_left_##name( pixel *src )\
52 for( int i = 0; i < 16; i += 2 )\
54 dc += src[-1 + i * FDEC_STRIDE];\
55 dc += src[-1 + (i+1) * FDEC_STRIDE];\
57 x264_predict_16x16_dc_left_core_##name( src, dc>>4 );\
60 PREDICT_16x16_DC_LEFT( mmx2 )
61 PREDICT_16x16_DC_LEFT( sse2 )
62 PREDICT_16x16_DC_LEFT( avx2 )
64 #define PREDICT_P_SUM(j,i)\
65 H += i * ( src[j+i - FDEC_STRIDE ] - src[j-i - FDEC_STRIDE ] );\
66 V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );
68 ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
69 ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
70 ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
71 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
72 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
73 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
75 #define PREDICT_16x16_P_CORE\
87 #define PREDICT_16x16_P_END(name)\
88 int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
89 int b = ( 5 * H + 32 ) >> 6;\
90 int c = ( 5 * V + 32 ) >> 6;\
91 int i00 = a - b * 7 - c * 7 + 16;\
92 /* b*15 + c*15 can overflow: it's easier to just branch away in this rare case
93 * than to try to consider it in the asm. */\
94 if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) )\
95 x264_predict_16x16_p_c( src );\
97 x264_predict_16x16_p_core_##name( src, i00, b, c );
99 #define PREDICT_16x16_P(name, name2)\
100 static void x264_predict_16x16_p_##name( pixel *src )\
102 PREDICT_16x16_P_CORE\
103 PREDICT_16x16_P_END(name2)\
106 #if HAVE_X86_INLINE_ASM
108 #define PREDICT_16x16_P_ASM\
110 "movdqu %1, %%xmm1 \n"\
111 "movdqa %2, %%xmm0 \n"\
112 "pmaddwd %3, %%xmm0 \n"\
113 "pmaddwd %4, %%xmm1 \n"\
114 "paddd %%xmm1, %%xmm0 \n"\
115 "movhlps %%xmm0, %%xmm1 \n"\
116 "paddd %%xmm1, %%xmm0 \n"\
117 "pshuflw $14, %%xmm0, %%xmm1 \n"\
118 "paddd %%xmm1, %%xmm0 \n"\
119 "movd %%xmm0, %0 \n"\
121 :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
122 "m"(*pw_12345678), "m"(*pw_m87654321)\
124 #else // !HIGH_BIT_DEPTH
125 #define PREDICT_16x16_P_ASM\
129 "palignr $7, %3, %%mm1 \n"\
130 "pmaddubsw %4, %%mm0 \n"\
131 "pmaddubsw %5, %%mm1 \n"\
132 "paddw %%mm1, %%mm0 \n"\
133 "pshufw $14, %%mm0, %%mm1 \n"\
134 "paddw %%mm1, %%mm0 \n"\
135 "pshufw $1, %%mm0, %%mm1 \n"\
136 "paddw %%mm1, %%mm0 \n"\
140 :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
141 "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
143 #endif // HIGH_BIT_DEPTH
145 #define PREDICT_16x16_P_CORE_INLINE\
148 V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )\
149 + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )\
150 + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )\
151 + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )\
152 + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )\
153 + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )\
154 + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )\
155 + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
157 #define PREDICT_16x16_P_INLINE(name, name2)\
158 static void x264_predict_16x16_p_##name( pixel *src )\
160 PREDICT_16x16_P_CORE_INLINE\
161 PREDICT_16x16_P_END(name2)\
163 #else // !HAVE_X86_INLINE_ASM
164 #define PREDICT_16x16_P_INLINE(name, name2) PREDICT_16x16_P(name, name2)
165 #endif // HAVE_X86_INLINE_ASM
168 PREDICT_16x16_P_INLINE( sse2, sse2 )
169 #else // !HIGH_BIT_DEPTH
171 PREDICT_16x16_P( mmx2, mmx2 )
172 #endif // !ARCH_X86_64
173 PREDICT_16x16_P( sse2, sse2 )
174 #if HAVE_X86_INLINE_ASM
175 PREDICT_16x16_P_INLINE( ssse3, sse2 )
176 #endif // HAVE_X86_INLINE_ASM
177 PREDICT_16x16_P_INLINE( avx, avx )
178 #endif // HIGH_BIT_DEPTH
179 PREDICT_16x16_P_INLINE( avx2, avx2 )
181 #define PREDICT_8x16C_P_CORE\
183 for( int i = 0; i < 4; i++ )\
184 H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] );\
185 for( int i = 0; i < 8; i++ )\
186 V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] );
189 #define PREDICT_8x16C_P_END(name)\
190 int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
191 int b = ( 17 * H + 16 ) >> 5;\
192 int c = ( 5 * V + 32 ) >> 6;\
193 x264_predict_8x16c_p_core_##name( src, a, b, c );
194 #else // !HIGH_BIT_DEPTH
195 #define PREDICT_8x16C_P_END(name)\
196 int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
197 int b = ( 17 * H + 16 ) >> 5;\
198 int c = ( 5 * V + 32 ) >> 6;\
199 int i00 = a -3*b -7*c + 16;\
200 x264_predict_8x16c_p_core_##name( src, i00, b, c );
201 #endif // HIGH_BIT_DEPTH
203 #define PREDICT_8x16C_P(name)\
204 static void x264_predict_8x16c_p_##name( pixel *src )\
206 PREDICT_8x16C_P_CORE\
207 PREDICT_8x16C_P_END(name)\
210 #if !ARCH_X86_64 && !HIGH_BIT_DEPTH
211 PREDICT_8x16C_P( mmx2 )
212 #endif // !ARCH_X86_64 && !HIGH_BIT_DEPTH
213 PREDICT_8x16C_P( sse2 )
214 PREDICT_8x16C_P( avx )
215 PREDICT_8x16C_P( avx2 )
217 #define PREDICT_8x8C_P_CORE\
226 #define PREDICT_8x8C_P_END(name)\
227 int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
228 int b = ( 17 * H + 16 ) >> 5;\
229 int c = ( 17 * V + 16 ) >> 5;\
230 x264_predict_8x8c_p_core_##name( src, a, b, c );
231 #else // !HIGH_BIT_DEPTH
232 #define PREDICT_8x8C_P_END(name)\
233 int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
234 int b = ( 17 * H + 16 ) >> 5;\
235 int c = ( 17 * V + 16 ) >> 5;\
236 int i00 = a -3*b -3*c + 16;\
237 x264_predict_8x8c_p_core_##name( src, i00, b, c );
238 #endif // HIGH_BIT_DEPTH
240 #define PREDICT_8x8C_P(name, name2)\
241 static void x264_predict_8x8c_p_##name( pixel *src )\
244 PREDICT_8x8C_P_END(name2)\
247 #if HAVE_X86_INLINE_ASM
249 #define PREDICT_8x8C_P_ASM\
251 "movdqa %1, %%xmm0 \n"\
252 "pmaddwd %2, %%xmm0 \n"\
253 "movhlps %%xmm0, %%xmm1 \n"\
254 "paddd %%xmm1, %%xmm0 \n"\
255 "pshuflw $14, %%xmm0, %%xmm1 \n"\
256 "paddd %%xmm1, %%xmm0 \n"\
257 "movd %%xmm0, %0 \n"\
259 :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
261 #else // !HIGH_BIT_DEPTH
262 #define PREDICT_8x8C_P_ASM\
265 "pmaddubsw %2, %%mm0 \n"\
266 "pshufw $14, %%mm0, %%mm1 \n"\
267 "paddw %%mm1, %%mm0 \n"\
268 "pshufw $1, %%mm0, %%mm1 \n"\
269 "paddw %%mm1, %%mm0 \n"\
273 :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
275 #endif // HIGH_BIT_DEPTH
277 #define PREDICT_8x8C_P_CORE_INLINE\
280 V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )\
281 + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )\
282 + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )\
283 + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );\
284 H += -4 * src[-1*FDEC_STRIDE -1];
286 #define PREDICT_8x8C_P_INLINE(name, name2)\
287 static void x264_predict_8x8c_p_##name( pixel *src )\
289 PREDICT_8x8C_P_CORE_INLINE\
290 PREDICT_8x8C_P_END(name2)\
292 #else // !HAVE_X86_INLINE_ASM
293 #define PREDICT_8x8C_P_INLINE(name, name2) PREDICT_8x8C_P(name, name2)
294 #endif // HAVE_X86_INLINE_ASM
297 PREDICT_8x8C_P_INLINE( sse2, sse2 )
298 #else //!HIGH_BIT_DEPTH
300 PREDICT_8x8C_P( mmx2, mmx2 )
301 #endif // !ARCH_X86_64
302 PREDICT_8x8C_P( sse2, sse2 )
303 #if HAVE_X86_INLINE_ASM
304 PREDICT_8x8C_P_INLINE( ssse3, sse2 )
305 #endif // HAVE_X86_INLINE_ASM
306 #endif // HIGH_BIT_DEPTH
307 PREDICT_8x8C_P_INLINE( avx, avx )
308 PREDICT_8x8C_P_INLINE( avx2, avx2 )
310 #if ARCH_X86_64 && !HIGH_BIT_DEPTH
311 static void x264_predict_8x8c_dc_left( uint8_t *src )
314 uint32_t s0 = 0, s1 = 0;
317 for( y = 0; y < 4; y++ )
319 s0 += src[y * FDEC_STRIDE - 1];
320 s1 += src[(y+4) * FDEC_STRIDE - 1];
322 dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
323 dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
325 for( y = 0; y < 4; y++ )
330 for( y = 0; y < 4; y++ )
336 #endif // ARCH_X86_64 && !HIGH_BIT_DEPTH
338 /****************************************************************************
339 * Exported functions:
340 ****************************************************************************/
341 void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
343 if( !(cpu&X264_CPU_MMX2) )
345 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmx2;
346 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmx2;
347 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2;
348 pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx2;
349 pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2;
351 if( !(cpu&X264_CPU_SSE) )
353 pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse;
354 if( !(cpu&X264_CPU_SSE2) )
356 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
357 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
358 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
359 pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2;
360 pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
361 if( !(cpu&X264_CPU_AVX) )
363 pf[I_PRED_16x16_V] = x264_predict_16x16_v_avx;
364 if( !(cpu&X264_CPU_AVX2) )
366 pf[I_PRED_16x16_H] = x264_predict_16x16_h_avx2;
369 pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmx2;
371 if( !(cpu&X264_CPU_SSE) )
373 pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse;
374 if( !(cpu&X264_CPU_SSE2) )
376 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
377 if( cpu&X264_CPU_SSE2_IS_SLOW )
379 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
380 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
381 pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
382 if( !(cpu&X264_CPU_SSSE3) )
384 if( !(cpu&X264_CPU_SLOW_PSHUFB) )
385 pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3;
386 #if HAVE_X86_INLINE_ASM
387 pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
389 if( !(cpu&X264_CPU_AVX) )
391 pf[I_PRED_16x16_P] = x264_predict_16x16_p_avx;
392 #endif // HIGH_BIT_DEPTH
394 if( cpu&X264_CPU_AVX2 )
396 pf[I_PRED_16x16_P] = x264_predict_16x16_p_avx2;
397 pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_avx2;
398 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_avx2;
399 pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_avx2;
403 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
405 if( !(cpu&X264_CPU_MMX) )
408 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
409 if( !(cpu&X264_CPU_MMX2) )
411 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2;
412 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2;
413 if( !(cpu&X264_CPU_SSE) )
415 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_sse;
416 if( !(cpu&X264_CPU_SSE2) )
418 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2;
419 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2;
420 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2;
421 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
422 if( !(cpu&X264_CPU_AVX) )
424 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx;
425 if( !(cpu&X264_CPU_AVX2) )
427 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_avx2;
430 pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
432 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
433 if( !(cpu&X264_CPU_MMX2) )
435 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_mmx2;
436 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2;
438 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_mmx2;
440 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2;
441 if( !(cpu&X264_CPU_SSE2) )
443 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
444 if( !(cpu&X264_CPU_SSSE3) )
446 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3;
447 #if HAVE_X86_INLINE_ASM
448 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
450 if( !(cpu&X264_CPU_AVX) )
452 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx;
453 #endif // HIGH_BIT_DEPTH
455 if( cpu&X264_CPU_AVX2 )
457 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx2;
461 void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] )
463 if( !(cpu&X264_CPU_MMX) )
466 if( !(cpu&X264_CPU_MMX2) )
468 pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2;
469 pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2;
470 if( !(cpu&X264_CPU_SSE) )
472 pf[I_PRED_CHROMA_V] = x264_predict_8x16c_v_sse;
473 if( !(cpu&X264_CPU_SSE2) )
475 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_sse2;
476 pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_sse2;
477 pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_sse2;
478 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_sse2;
479 if( !(cpu&X264_CPU_AVX) )
481 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx;
482 if( !(cpu&X264_CPU_AVX2) )
484 pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_avx2;
486 pf[I_PRED_CHROMA_V] = x264_predict_8x16c_v_mmx;
487 if( !(cpu&X264_CPU_MMX2) )
489 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_mmx2;
490 pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2;
491 pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2;
493 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_mmx2;
495 if( !(cpu&X264_CPU_SSE2) )
497 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_sse2;
498 if( !(cpu&X264_CPU_SSSE3) )
500 pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_ssse3;
501 if( !(cpu&X264_CPU_AVX) )
503 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx;
504 #endif // HIGH_BIT_DEPTH
506 if( cpu&X264_CPU_AVX2 )
508 pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx2;
512 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
514 if( !(cpu&X264_CPU_MMX2) )
517 if( !(cpu&X264_CPU_SSE) )
519 pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse;
520 if( !(cpu&X264_CPU_SSE2) )
522 pf[I_PRED_8x8_H] = x264_predict_8x8_h_sse2;
523 pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_sse2;
524 pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_sse2;
525 pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
526 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
527 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
528 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
529 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
530 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
531 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
532 *predict_8x8_filter = x264_predict_8x8_filter_sse2;
533 if( !(cpu&X264_CPU_SSSE3) )
535 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_ssse3;
536 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_ssse3;
537 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
538 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
539 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_ssse3;
540 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
541 *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
542 if( cpu&X264_CPU_CACHELINE_64 )
544 pf[I_PRED_8x8_DDL]= x264_predict_8x8_ddl_ssse3_cache64;
545 pf[I_PRED_8x8_DDR]= x264_predict_8x8_ddr_ssse3_cache64;
547 if( !(cpu&X264_CPU_AVX) )
549 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
550 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_avx;
551 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
552 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
553 *predict_8x8_filter = x264_predict_8x8_filter_avx;
555 pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmx2;
556 pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmx2;
557 pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmx2;
558 pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmx2;
559 pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmx2;
560 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_mmx2;
561 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_mmx2;
562 *predict_8x8_filter = x264_predict_8x8_filter_mmx2;
564 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_mmx2;
565 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_mmx2;
566 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_mmx2;
567 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_mmx2;
569 if( !(cpu&X264_CPU_SSE2) )
571 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
572 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
573 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
574 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
575 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
576 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
577 if( !(cpu&X264_CPU_SSSE3) )
579 if( !(cpu&X264_CPU_SLOW_PALIGNR) )
581 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_ssse3;
582 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
584 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
585 *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
586 if( !(cpu&X264_CPU_AVX) )
588 pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_avx;
589 pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_avx;
590 pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
591 pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
592 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
593 #endif // HIGH_BIT_DEPTH
596 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
598 if( !(cpu&X264_CPU_MMX2) )
600 pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_mmx2;
601 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmx2;
602 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmx2;
603 pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmx2;
604 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_mmx2;
605 pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_mmx2;
607 if( !(cpu&X264_CPU_SSE2) )
609 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
610 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_sse2;
611 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_sse2;
612 pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
613 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_sse2;
614 if( !(cpu&X264_CPU_SSSE3) )
616 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
617 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
618 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
619 if( !(cpu&X264_CPU_AVX) )
621 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_avx;
622 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_avx;
623 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_avx;
624 pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_avx;
625 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_avx;
626 if( !(cpu&X264_CPU_AVX2) )
628 pf[I_PRED_4x4_H] = x264_predict_4x4_h_avx2;
630 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmx2;
631 if( !(cpu&X264_CPU_SSSE3) )
633 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
634 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
635 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
636 if( cpu&X264_CPU_CACHELINE_64 )
637 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3_cache64;
638 #endif // HIGH_BIT_DEPTH