]> git.sesse.net Git - x264/blob - common/x86/predict-c.c
Bump dates to 2014
[x264] / common / x86 / predict-c.c
1 /*****************************************************************************
2  * predict-c.c: intra prediction
3  *****************************************************************************
4  * Copyright (C) 2003-2014 x264 project
5  *
6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7  *          Loren Merritt <lorenm@u.washington.edu>
8  *          Fiona Glaser <fiona@x264.com>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23  *
24  * This program is also available under a commercial proprietary license.
25  * For more information, contact us at licensing@x264.com.
26  *****************************************************************************/
27
28 #include "common/common.h"
29 #include "predict.h"
30 #include "pixel.h"
31
32 #define PREDICT_16x16_DC(name)\
33 void x264_predict_16x16_dc_##name( pixel *src )\
34 {\
35     uint32_t dc = 16;\
36     for( int i = 0; i < 16; i += 2 )\
37     {\
38         dc += src[-1 + i * FDEC_STRIDE];\
39         dc += src[-1 + (i+1) * FDEC_STRIDE];\
40     }\
41     x264_predict_16x16_dc_core_##name( src, dc );\
42 }
43
44 PREDICT_16x16_DC( mmx2 )
45 PREDICT_16x16_DC( sse2 )
46 PREDICT_16x16_DC( avx2 )
47
48 #define PREDICT_16x16_DC_LEFT(name)\
49 static void x264_predict_16x16_dc_left_##name( pixel *src )\
50 {\
51     uint32_t dc = 8;\
52     for( int i = 0; i < 16; i += 2 )\
53     {\
54         dc += src[-1 + i * FDEC_STRIDE];\
55         dc += src[-1 + (i+1) * FDEC_STRIDE];\
56     }\
57     x264_predict_16x16_dc_left_core_##name( src, dc>>4 );\
58 }
59
60 PREDICT_16x16_DC_LEFT( mmx2 )
61 PREDICT_16x16_DC_LEFT( sse2 )
62 PREDICT_16x16_DC_LEFT( avx2 )
63
64 #define PREDICT_P_SUM(j,i)\
65     H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
66     V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );
67
68 ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
69 ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
70 ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
71 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
72 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
73 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
74
75 #define PREDICT_16x16_P_CORE\
76     int H = 0;\
77     int V = 0;\
78     PREDICT_P_SUM(7,1)\
79     PREDICT_P_SUM(7,2)\
80     PREDICT_P_SUM(7,3)\
81     PREDICT_P_SUM(7,4)\
82     PREDICT_P_SUM(7,5)\
83     PREDICT_P_SUM(7,6)\
84     PREDICT_P_SUM(7,7)\
85     PREDICT_P_SUM(7,8)
86
87 #define PREDICT_16x16_P_END(name)\
88     int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
89     int b = ( 5 * H + 32 ) >> 6;\
90     int c = ( 5 * V + 32 ) >> 6;\
91     int i00 = a - b * 7 - c * 7 + 16;\
92     /* b*15 + c*15 can overflow: it's easier to just branch away in this rare case
93      * than to try to consider it in the asm. */\
94     if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) )\
95         x264_predict_16x16_p_c( src );\
96     else\
97         x264_predict_16x16_p_core_##name( src, i00, b, c );
98
99 #define PREDICT_16x16_P(name, name2)\
100 static void x264_predict_16x16_p_##name( pixel *src )\
101 {\
102     PREDICT_16x16_P_CORE\
103     PREDICT_16x16_P_END(name2)\
104 }
105
106 #if HAVE_X86_INLINE_ASM
107 #if HIGH_BIT_DEPTH
108 #define PREDICT_16x16_P_ASM\
109     asm (\
110         "movdqu           %1, %%xmm1 \n"\
111         "movdqa           %2, %%xmm0 \n"\
112         "pmaddwd          %3, %%xmm0 \n"\
113         "pmaddwd          %4, %%xmm1 \n"\
114         "paddd        %%xmm1, %%xmm0 \n"\
115         "movhlps      %%xmm0, %%xmm1 \n"\
116         "paddd        %%xmm1, %%xmm0 \n"\
117         "pshuflw $14, %%xmm0, %%xmm1 \n"\
118         "paddd        %%xmm1, %%xmm0 \n"\
119         "movd         %%xmm0, %0     \n"\
120         :"=r"(H)\
121         :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
122          "m"(*pw_12345678), "m"(*pw_m87654321)\
123     );
124 #else // !HIGH_BIT_DEPTH
125 #define PREDICT_16x16_P_ASM\
126     asm (\
127         "movq           %1, %%mm1 \n"\
128         "movq           %2, %%mm0 \n"\
129         "palignr $7,    %3, %%mm1 \n"\
130         "pmaddubsw      %4, %%mm0 \n"\
131         "pmaddubsw      %5, %%mm1 \n"\
132         "paddw       %%mm1, %%mm0 \n"\
133         "pshufw $14, %%mm0, %%mm1 \n"\
134         "paddw       %%mm1, %%mm0 \n"\
135         "pshufw  $1, %%mm0, %%mm1 \n"\
136         "paddw       %%mm1, %%mm0 \n"\
137         "movd        %%mm0, %0    \n"\
138         "movswl        %w0, %0    \n"\
139         :"=r"(H)\
140         :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
141          "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
142     );
143 #endif // HIGH_BIT_DEPTH
144
145 #define PREDICT_16x16_P_CORE_INLINE\
146     int H, V;\
147     PREDICT_16x16_P_ASM\
148     V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )\
149       + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )\
150       + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )\
151       + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )\
152       + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )\
153       + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )\
154       + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )\
155       + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
156
157 #define PREDICT_16x16_P_INLINE(name, name2)\
158 static void x264_predict_16x16_p_##name( pixel *src )\
159 {\
160     PREDICT_16x16_P_CORE_INLINE\
161     PREDICT_16x16_P_END(name2)\
162 }
163 #else // !HAVE_X86_INLINE_ASM
164 #define PREDICT_16x16_P_INLINE(name, name2) PREDICT_16x16_P(name, name2)
165 #endif // HAVE_X86_INLINE_ASM
166
167 #if HIGH_BIT_DEPTH
168 PREDICT_16x16_P_INLINE( sse2, sse2 )
169 #else // !HIGH_BIT_DEPTH
170 #if !ARCH_X86_64
171 PREDICT_16x16_P( mmx2, mmx2 )
172 #endif // !ARCH_X86_64
173 PREDICT_16x16_P( sse2, sse2 )
174 #if HAVE_X86_INLINE_ASM
175 PREDICT_16x16_P_INLINE( ssse3, sse2 )
176 #endif // HAVE_X86_INLINE_ASM
177 PREDICT_16x16_P_INLINE( avx, avx )
178 #endif // HIGH_BIT_DEPTH
179 PREDICT_16x16_P_INLINE( avx2, avx2 )
180
181 #define PREDICT_8x16C_P_CORE\
182     int H = 0, V = 0;\
183     for( int i = 0; i < 4; i++ )\
184         H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] );\
185     for( int i = 0; i < 8; i++ )\
186         V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] );
187
188 #if HIGH_BIT_DEPTH
189 #define PREDICT_8x16C_P_END(name)\
190     int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
191     int b = ( 17 * H + 16 ) >> 5;\
192     int c = ( 5 * V + 32 ) >> 6;\
193     x264_predict_8x16c_p_core_##name( src, a, b, c );
194 #else // !HIGH_BIT_DEPTH
195 #define PREDICT_8x16C_P_END(name)\
196     int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
197     int b = ( 17 * H + 16 ) >> 5;\
198     int c = ( 5 * V + 32 ) >> 6;\
199     int i00 = a -3*b -7*c + 16;\
200     x264_predict_8x16c_p_core_##name( src, i00, b, c );
201 #endif // HIGH_BIT_DEPTH
202
203 #define PREDICT_8x16C_P(name)\
204 static void x264_predict_8x16c_p_##name( pixel *src )\
205 {\
206     PREDICT_8x16C_P_CORE\
207     PREDICT_8x16C_P_END(name)\
208 }
209
210 #if !ARCH_X86_64 && !HIGH_BIT_DEPTH
211 PREDICT_8x16C_P( mmx2 )
212 #endif // !ARCH_X86_64 && !HIGH_BIT_DEPTH
213 PREDICT_8x16C_P( sse2 )
214 PREDICT_8x16C_P( avx )
215 PREDICT_8x16C_P( avx2 )
216
217 #define PREDICT_8x8C_P_CORE\
218     int H = 0;\
219     int V = 0;\
220     PREDICT_P_SUM(3,1)\
221     PREDICT_P_SUM(3,2)\
222     PREDICT_P_SUM(3,3)\
223     PREDICT_P_SUM(3,4)
224
225 #if HIGH_BIT_DEPTH
226 #define PREDICT_8x8C_P_END(name)\
227     int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
228     int b = ( 17 * H + 16 ) >> 5;\
229     int c = ( 17 * V + 16 ) >> 5;\
230     x264_predict_8x8c_p_core_##name( src, a, b, c );
231 #else // !HIGH_BIT_DEPTH
232 #define PREDICT_8x8C_P_END(name)\
233     int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
234     int b = ( 17 * H + 16 ) >> 5;\
235     int c = ( 17 * V + 16 ) >> 5;\
236     int i00 = a -3*b -3*c + 16;\
237     x264_predict_8x8c_p_core_##name( src, i00, b, c );
238 #endif // HIGH_BIT_DEPTH
239
240 #define PREDICT_8x8C_P(name, name2)\
241 static void x264_predict_8x8c_p_##name( pixel *src )\
242 {\
243     PREDICT_8x8C_P_CORE\
244     PREDICT_8x8C_P_END(name2)\
245 }
246
247 #if HAVE_X86_INLINE_ASM
248 #if HIGH_BIT_DEPTH
249 #define PREDICT_8x8C_P_ASM\
250     asm (\
251         "movdqa           %1, %%xmm0 \n"\
252         "pmaddwd          %2, %%xmm0 \n"\
253         "movhlps      %%xmm0, %%xmm1 \n"\
254         "paddd        %%xmm1, %%xmm0 \n"\
255         "pshuflw $14, %%xmm0, %%xmm1 \n"\
256         "paddd        %%xmm1, %%xmm0 \n"\
257         "movd         %%xmm0, %0     \n"\
258         :"=r"(H)\
259         :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
260     );
261 #else // !HIGH_BIT_DEPTH
262 #define PREDICT_8x8C_P_ASM\
263     asm (\
264         "movq           %1, %%mm0 \n"\
265         "pmaddubsw      %2, %%mm0 \n"\
266         "pshufw $14, %%mm0, %%mm1 \n"\
267         "paddw       %%mm1, %%mm0 \n"\
268         "pshufw  $1, %%mm0, %%mm1 \n"\
269         "paddw       %%mm1, %%mm0 \n"\
270         "movd        %%mm0, %0    \n"\
271         "movswl        %w0, %0    \n"\
272         :"=r"(H)\
273         :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
274     );
275 #endif // HIGH_BIT_DEPTH
276
277 #define PREDICT_8x8C_P_CORE_INLINE\
278     int H, V;\
279     PREDICT_8x8C_P_ASM\
280     V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )\
281       + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )\
282       + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )\
283       + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );\
284     H += -4 * src[-1*FDEC_STRIDE -1];
285
286 #define PREDICT_8x8C_P_INLINE(name, name2)\
287 static void x264_predict_8x8c_p_##name( pixel *src )\
288 {\
289     PREDICT_8x8C_P_CORE_INLINE\
290     PREDICT_8x8C_P_END(name2)\
291 }
292 #else // !HAVE_X86_INLINE_ASM
293 #define PREDICT_8x8C_P_INLINE(name, name2) PREDICT_8x8C_P(name, name2)
294 #endif // HAVE_X86_INLINE_ASM
295
296 #if HIGH_BIT_DEPTH
297 PREDICT_8x8C_P_INLINE( sse2, sse2 )
298 #else  //!HIGH_BIT_DEPTH
299 #if !ARCH_X86_64
300 PREDICT_8x8C_P( mmx2, mmx2 )
301 #endif // !ARCH_X86_64
302 PREDICT_8x8C_P( sse2, sse2 )
303 #if HAVE_X86_INLINE_ASM
304 PREDICT_8x8C_P_INLINE( ssse3, sse2 )
305 #endif // HAVE_X86_INLINE_ASM
306 #endif // HIGH_BIT_DEPTH
307 PREDICT_8x8C_P_INLINE( avx, avx )
308 PREDICT_8x8C_P_INLINE( avx2, avx2 )
309
310 #if ARCH_X86_64 && !HIGH_BIT_DEPTH
311 static void x264_predict_8x8c_dc_left( uint8_t *src )
312 {
313     int y;
314     uint32_t s0 = 0, s1 = 0;
315     uint64_t dc0, dc1;
316
317     for( y = 0; y < 4; y++ )
318     {
319         s0 += src[y * FDEC_STRIDE     - 1];
320         s1 += src[(y+4) * FDEC_STRIDE - 1];
321     }
322     dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
323     dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
324
325     for( y = 0; y < 4; y++ )
326     {
327         M64( src ) = dc0;
328         src += FDEC_STRIDE;
329     }
330     for( y = 0; y < 4; y++ )
331     {
332         M64( src ) = dc1;
333         src += FDEC_STRIDE;
334     }
335 }
336 #endif // ARCH_X86_64 && !HIGH_BIT_DEPTH
337
338 /****************************************************************************
339  * Exported functions:
340  ****************************************************************************/
341 void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
342 {
343     if( !(cpu&X264_CPU_MMX2) )
344         return;
345     pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_mmx2;
346     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_mmx2;
347     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2;
348     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_mmx2;
349     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_mmx2;
350 #if HIGH_BIT_DEPTH
351     if( !(cpu&X264_CPU_SSE) )
352         return;
353     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_sse;
354     if( !(cpu&X264_CPU_SSE2) )
355         return;
356     pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_sse2;
357     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_sse2;
358     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
359     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_sse2;
360     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_sse2;
361     if( !(cpu&X264_CPU_AVX) )
362         return;
363     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_avx;
364     if( !(cpu&X264_CPU_AVX2) )
365         return;
366     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_avx2;
367 #else
368 #if !ARCH_X86_64
369     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_mmx2;
370 #endif
371     if( !(cpu&X264_CPU_SSE) )
372         return;
373     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_sse;
374     if( !(cpu&X264_CPU_SSE2) )
375         return;
376     pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_sse2;
377     if( cpu&X264_CPU_SSE2_IS_SLOW )
378         return;
379     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_sse2;
380     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
381     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_sse2;
382     if( !(cpu&X264_CPU_SSSE3) )
383         return;
384     if( !(cpu&X264_CPU_SLOW_PSHUFB) )
385         pf[I_PRED_16x16_H]       = x264_predict_16x16_h_ssse3;
386 #if HAVE_X86_INLINE_ASM
387     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_ssse3;
388 #endif
389     if( !(cpu&X264_CPU_AVX) )
390         return;
391     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_avx;
392 #endif // HIGH_BIT_DEPTH
393
394     if( cpu&X264_CPU_AVX2 )
395     {
396         pf[I_PRED_16x16_P]       = x264_predict_16x16_p_avx2;
397         pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_avx2;
398         pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_avx2;
399         pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_avx2;
400     }
401 }
402
403 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
404 {
405     if( !(cpu&X264_CPU_MMX) )
406         return;
407 #if HIGH_BIT_DEPTH
408     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
409     if( !(cpu&X264_CPU_MMX2) )
410         return;
411     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmx2;
412     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_mmx2;
413     if( !(cpu&X264_CPU_SSE) )
414         return;
415     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_sse;
416     if( !(cpu&X264_CPU_SSE2) )
417         return;
418     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_sse2;
419     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_sse2;
420     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_sse2;
421     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_sse2;
422     if( !(cpu&X264_CPU_AVX) )
423         return;
424     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_avx;
425     if( !(cpu&X264_CPU_AVX2) )
426         return;
427     pf[I_PRED_CHROMA_H]   = x264_predict_8x8c_h_avx2;
428 #else
429 #if ARCH_X86_64
430     pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
431 #endif
432     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
433     if( !(cpu&X264_CPU_MMX2) )
434         return;
435     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_mmx2;
436     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_mmx2;
437 #if !ARCH_X86_64
438     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_mmx2;
439 #endif
440     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmx2;
441     if( !(cpu&X264_CPU_SSE2) )
442         return;
443     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_sse2;
444     if( !(cpu&X264_CPU_SSSE3) )
445         return;
446     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_ssse3;
447 #if HAVE_X86_INLINE_ASM
448     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_ssse3;
449 #endif
450     if( !(cpu&X264_CPU_AVX) )
451         return;
452     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_avx;
453 #endif // HIGH_BIT_DEPTH
454
455     if( cpu&X264_CPU_AVX2 )
456     {
457         pf[I_PRED_CHROMA_P]   = x264_predict_8x8c_p_avx2;
458     }
459 }
460
461 void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] )
462 {
463     if( !(cpu&X264_CPU_MMX) )
464         return;
465 #if HIGH_BIT_DEPTH
466     if( !(cpu&X264_CPU_MMX2) )
467         return;
468     pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_mmx2;
469     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_mmx2;
470     if( !(cpu&X264_CPU_SSE) )
471         return;
472     pf[I_PRED_CHROMA_V]       = x264_predict_8x16c_v_sse;
473     if( !(cpu&X264_CPU_SSE2) )
474         return;
475     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x16c_dc_top_sse2;
476     pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_sse2;
477     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_sse2;
478     pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_sse2;
479     if( !(cpu&X264_CPU_AVX) )
480         return;
481     pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_avx;
482     if( !(cpu&X264_CPU_AVX2) )
483         return;
484     pf[I_PRED_CHROMA_H]   = x264_predict_8x16c_h_avx2;
485 #else
486     pf[I_PRED_CHROMA_V]       = x264_predict_8x16c_v_mmx;
487     if( !(cpu&X264_CPU_MMX2) )
488         return;
489     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x16c_dc_top_mmx2;
490     pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_mmx2;
491     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_mmx2;
492 #if !ARCH_X86_64
493     pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_mmx2;
494 #endif
495     if( !(cpu&X264_CPU_SSE2) )
496         return;
497     pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_sse2;
498     if( !(cpu&X264_CPU_SSSE3) )
499         return;
500     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_ssse3;
501     if( !(cpu&X264_CPU_AVX) )
502         return;
503     pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_avx;
504 #endif // HIGH_BIT_DEPTH
505
506     if( cpu&X264_CPU_AVX2 )
507     {
508         pf[I_PRED_CHROMA_P]   = x264_predict_8x16c_p_avx2;
509     }
510 }
511
512 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
513 {
514     if( !(cpu&X264_CPU_MMX2) )
515         return;
516 #if HIGH_BIT_DEPTH
517     if( !(cpu&X264_CPU_SSE) )
518         return;
519     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_sse;
520     if( !(cpu&X264_CPU_SSE2) )
521         return;
522     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_sse2;
523     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_sse2;
524     pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_sse2;
525     pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
526     pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_sse2;
527     pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_sse2;
528     pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_sse2;
529     pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_sse2;
530     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_sse2;
531     pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_sse2;
532     *predict_8x8_filter   = x264_predict_8x8_filter_sse2;
533     if( !(cpu&X264_CPU_SSSE3) )
534         return;
535     pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_ssse3;
536     pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_ssse3;
537     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_ssse3;
538     pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_ssse3;
539     pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_ssse3;
540     pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_ssse3;
541     *predict_8x8_filter   = x264_predict_8x8_filter_ssse3;
542     if( cpu&X264_CPU_CACHELINE_64 )
543     {
544         pf[I_PRED_8x8_DDL]= x264_predict_8x8_ddl_ssse3_cache64;
545         pf[I_PRED_8x8_DDR]= x264_predict_8x8_ddr_ssse3_cache64;
546     }
547     if( !(cpu&X264_CPU_AVX) )
548         return;
549     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_avx;
550     pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_avx;
551     pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_avx;
552     pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_avx;
553     *predict_8x8_filter   = x264_predict_8x8_filter_avx;
554 #else
555     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_mmx2;
556     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_mmx2;
557     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_mmx2;
558     pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmx2;
559     pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmx2;
560     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_mmx2;
561     pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_mmx2;
562     *predict_8x8_filter   = x264_predict_8x8_filter_mmx2;
563 #if ARCH_X86
564     pf[I_PRED_8x8_DDL]  = x264_predict_8x8_ddl_mmx2;
565     pf[I_PRED_8x8_DDR]  = x264_predict_8x8_ddr_mmx2;
566     pf[I_PRED_8x8_VR]   = x264_predict_8x8_vr_mmx2;
567     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_mmx2;
568 #endif
569     if( !(cpu&X264_CPU_SSE2) )
570         return;
571     pf[I_PRED_8x8_DDL]  = x264_predict_8x8_ddl_sse2;
572     pf[I_PRED_8x8_VL]   = x264_predict_8x8_vl_sse2;
573     pf[I_PRED_8x8_VR]   = x264_predict_8x8_vr_sse2;
574     pf[I_PRED_8x8_DDR]  = x264_predict_8x8_ddr_sse2;
575     pf[I_PRED_8x8_HD]   = x264_predict_8x8_hd_sse2;
576     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_sse2;
577     if( !(cpu&X264_CPU_SSSE3) )
578         return;
579     if( !(cpu&X264_CPU_SLOW_PALIGNR) )
580     {
581         pf[I_PRED_8x8_DDL]  = x264_predict_8x8_ddl_ssse3;
582         pf[I_PRED_8x8_VR]   = x264_predict_8x8_vr_ssse3;
583     }
584     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_ssse3;
585     *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
586     if( !(cpu&X264_CPU_AVX) )
587         return;
588     pf[I_PRED_8x8_DDL]  = x264_predict_8x8_ddl_avx;
589     pf[I_PRED_8x8_DDR]  = x264_predict_8x8_ddr_avx;
590     pf[I_PRED_8x8_VL]   = x264_predict_8x8_vl_avx;
591     pf[I_PRED_8x8_VR]   = x264_predict_8x8_vr_avx;
592     pf[I_PRED_8x8_HD]   = x264_predict_8x8_hd_avx;
593 #endif // HIGH_BIT_DEPTH
594 }
595
596 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
597 {
598     if( !(cpu&X264_CPU_MMX2) )
599         return;
600     pf[I_PRED_4x4_DC]  = x264_predict_4x4_dc_mmx2;
601     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmx2;
602     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmx2;
603     pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_mmx2;
604     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_mmx2;
605     pf[I_PRED_4x4_HU]  = x264_predict_4x4_hu_mmx2;
606 #if HIGH_BIT_DEPTH
607     if( !(cpu&X264_CPU_SSE2) )
608         return;
609     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
610     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_sse2;
611     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_sse2;
612     pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_sse2;
613     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_sse2;
614     if( !(cpu&X264_CPU_SSSE3) )
615         return;
616     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
617     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_ssse3;
618     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_ssse3;
619     if( !(cpu&X264_CPU_AVX) )
620         return;
621     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_avx;
622     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_avx;
623     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_avx;
624     pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_avx;
625     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_avx;
626     if( !(cpu&X264_CPU_AVX2) )
627         return;
628     pf[I_PRED_4x4_H]  = x264_predict_4x4_h_avx2;
629 #else
630     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_mmx2;
631     if( !(cpu&X264_CPU_SSSE3) )
632         return;
633     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
634     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_ssse3;
635     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_ssse3;
636     if( cpu&X264_CPU_CACHELINE_64 )
637         pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3_cache64;
638 #endif // HIGH_BIT_DEPTH
639 }