]> git.sesse.net Git - x264/blob - common/x86/predict-c.c
Bump dates to 2011
[x264] / common / x86 / predict-c.c
1 /*****************************************************************************
2  * predict-c.c: intra prediction
3  *****************************************************************************
4  * Copyright (C) 2003-2011 x264 project
5  *
6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7  *          Loren Merritt <lorenm@u.washington.edu>
8  *          Fiona Glaser <fiona@x264.com>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23  *
24  * This program is also available under a commercial proprietary license.
25  * For more information, contact us at licensing@x264.com.
26  *****************************************************************************/
27
28 #include "common/common.h"
29 #include "predict.h"
30 #include "pixel.h"
31
32  void x264_predict_16x16_v_mmx( pixel *src );
33  void x264_predict_16x16_v_sse2( pixel *src );
34  void x264_predict_16x16_h_mmxext( pixel *src );
35  void x264_predict_16x16_h_sse2( uint16_t *src );
36  void x264_predict_16x16_h_ssse3( uint8_t *src );
37  void x264_predict_16x16_dc_core_mmxext( pixel *src, int i_dc_left );
38  void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left );
39  void x264_predict_16x16_dc_left_core_mmxext( pixel *src, int i_dc_left );
40  void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left );
41  void x264_predict_16x16_dc_top_mmxext( pixel *src );
42  void x264_predict_16x16_dc_top_sse2( pixel *src );
43  void x264_predict_16x16_dc_top_ssse3( uint16_t *src );
44  void x264_predict_16x16_p_core_mmxext( uint8_t *src, int i00, int b, int c );
45  void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c );
46  void x264_predict_8x8c_p_core_mmxext( uint8_t *src, int i00, int b, int c );
47  void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c );
48  void x264_predict_8x8c_dc_mmxext( pixel *src );
49  void x264_predict_8x8c_dc_sse2( uint16_t *src );
50  void x264_predict_8x8c_dc_top_mmxext( uint8_t *src );
51  void x264_predict_8x8c_dc_top_sse2( uint16_t *src );
52  void x264_predict_8x8c_v_mmx( pixel *src );
53  void x264_predict_8x8c_v_sse2( uint16_t *src );
54  void x264_predict_8x8c_h_mmxext( uint8_t *src );
55  void x264_predict_8x8c_h_sse2( pixel *src );
56  void x264_predict_8x8c_h_ssse3( uint8_t *src );
57  void x264_predict_8x8_v_mmxext( uint8_t *src, uint8_t edge[33] );
58  void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[33] );
59  void x264_predict_8x8_h_mmxext( uint8_t *src, uint8_t edge[33] );
60  void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[33] );
61  void x264_predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );
62  void x264_predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );
63  void x264_predict_8x8_dc_mmxext( uint8_t *src, uint8_t edge[33] );
64  void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[33] );
65  void x264_predict_8x8_dc_top_mmxext( uint8_t *src, uint8_t edge[33] );
66  void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[33] );
67  void x264_predict_8x8_dc_left_mmxext( uint8_t *src, uint8_t edge[33] );
68  void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[33] );
69  void x264_predict_8x8_ddl_mmxext( uint8_t *src, uint8_t edge[33] );
70  void x264_predict_8x8_ddr_mmxext( uint8_t *src, uint8_t edge[33] );
71  void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[33] );
72  void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[33] );
73  void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] );
74  void x264_predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] );
75  void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[33] );
76  void x264_predict_8x8_vr_ssse3( uint16_t *src, uint16_t edge[33] );
77  void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[33] );
78  void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[33] );
79  void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[33] );
80  void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[33] );
81  void x264_predict_8x8_filter_mmxext( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
82  void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[33], int i_neighbor, int i_filters );
83  void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[33], int i_neighbor, int i_filters );
84  void x264_predict_4x4_ddl_mmxext( pixel *src );
85  void x264_predict_4x4_ddl_sse2( uint16_t *src );
86  void x264_predict_4x4_ddr_mmxext( pixel *src );
87  void x264_predict_4x4_vl_mmxext( pixel *src );
88  void x264_predict_4x4_vl_sse2( uint16_t *src );
89  void x264_predict_4x4_vr_mmxext( uint8_t *src );
90  void x264_predict_4x4_vr_sse2( uint16_t *src );
91  void x264_predict_4x4_vr_ssse3( pixel *src );
92  void x264_predict_4x4_hd_mmxext( pixel *src );
93  void x264_predict_4x4_hd_sse2( uint16_t *src );
94  void x264_predict_4x4_hd_ssse3( pixel *src );
95  void x264_predict_4x4_dc_mmxext( pixel *src );
96  void x264_predict_4x4_ddr_sse2( uint16_t *src );
97  void x264_predict_4x4_ddr_ssse3( pixel *src );
98  void x264_predict_4x4_hu_mmxext( pixel *src );
99
100 #define PREDICT_16x16_DC(name)\
101 static void x264_predict_16x16_dc_##name( pixel *src )\
102 {\
103     uint32_t dc = 16;\
104     for( int i = 0; i < 16; i += 2 )\
105     {\
106         dc += src[-1 + i * FDEC_STRIDE];\
107         dc += src[-1 + (i+1) * FDEC_STRIDE];\
108     }\
109     x264_predict_16x16_dc_core_##name( src, dc );\
110 }
111
112 PREDICT_16x16_DC( mmxext )
113 PREDICT_16x16_DC( sse2 )
114
115 #define PREDICT_16x16_DC_LEFT(name)\
116 static void x264_predict_16x16_dc_left_##name( pixel *src )\
117 {\
118     uint32_t dc = 8;\
119     for( int i = 0; i < 16; i += 2 )\
120     {\
121         dc += src[-1 + i * FDEC_STRIDE];\
122         dc += src[-1 + (i+1) * FDEC_STRIDE];\
123     }\
124     x264_predict_16x16_dc_left_core_##name( src, dc>>4 );\
125 }
126
127 PREDICT_16x16_DC_LEFT( mmxext )
128 PREDICT_16x16_DC_LEFT( sse2 )
129
130 #define PREDICT_P_SUM(j,i)\
131     H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
132     V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );\
133
134 ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
135 ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
136 ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
137 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
138 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
139 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
140
141 #if !HIGH_BIT_DEPTH
142 #define PREDICT_16x16_P(name)\
143 static void x264_predict_16x16_p_##name( pixel *src )\
144 {\
145     int a, b, c;\
146     int H = 0;\
147     int V = 0;\
148     int i00;\
149     PREDICT_P_SUM(7,1) \
150     PREDICT_P_SUM(7,2) \
151     PREDICT_P_SUM(7,3) \
152     PREDICT_P_SUM(7,4) \
153     PREDICT_P_SUM(7,5) \
154     PREDICT_P_SUM(7,6) \
155     PREDICT_P_SUM(7,7) \
156     PREDICT_P_SUM(7,8) \
157     a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
158     b = ( 5 * H + 32 ) >> 6;\
159     c = ( 5 * V + 32 ) >> 6;\
160     i00 = a - b * 7 - c * 7 + 16;\
161     x264_predict_16x16_p_core_##name( src, i00, b, c );\
162 }
163 #ifndef ARCH_X86_64
164 PREDICT_16x16_P( mmxext )
165 #endif
166 PREDICT_16x16_P( sse2   )
167 #endif //!HIGH_BIT_DEPTH
168
169 #ifdef __GNUC__
170 #if HIGH_BIT_DEPTH
171 static void x264_predict_16x16_p_sse2( uint16_t *src )
172 #else
173 static void x264_predict_16x16_p_ssse3( uint8_t *src )
174 #endif
175 {
176     int a, b, c, i00;
177     int H, V;
178 #if HIGH_BIT_DEPTH
179     asm (
180         "movdqu        -2+%1, %%xmm1 \n"
181         "movdqa        16+%1, %%xmm0 \n"
182         "pmaddwd          %2, %%xmm0 \n"
183         "pmaddwd          %3, %%xmm1 \n"
184         "paddd        %%xmm1, %%xmm0 \n"
185         "movhlps      %%xmm0, %%xmm1 \n"
186         "paddd        %%xmm1, %%xmm0 \n"
187         "pshuflw $14, %%xmm0, %%xmm1 \n"
188         "paddd        %%xmm1, %%xmm0 \n"
189         "movd         %%xmm0, %0     \n"
190         :"=r"(H)
191         :"m"(src[-FDEC_STRIDE]), "m"(*pw_12345678), "m"(*pw_m87654321)
192     );
193 #else
194     asm (
195         "movq           %1, %%mm1 \n"
196         "movq         8+%1, %%mm0 \n"
197         "palignr $7, -8+%1, %%mm1 \n"
198         "pmaddubsw      %2, %%mm0 \n"
199         "pmaddubsw      %3, %%mm1 \n"
200         "paddw       %%mm1, %%mm0 \n"
201         "pshufw $14, %%mm0, %%mm1 \n"
202         "paddw       %%mm1, %%mm0 \n"
203         "pshufw  $1, %%mm0, %%mm1 \n"
204         "paddw       %%mm1, %%mm0 \n"
205         "movd        %%mm0, %0    \n"
206         "movsx         %w0, %0    \n"
207         :"=r"(H)
208         :"m"(src[-FDEC_STRIDE]), "m"(*pb_12345678), "m"(*pb_m87654321)
209     );
210 #endif
211     V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )
212       + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )
213       + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )
214       + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )
215       + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )
216       + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )
217       + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )
218       + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
219     a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );
220     b = ( 5 * H + 32 ) >> 6;
221     c = ( 5 * V + 32 ) >> 6;
222     i00 = a - b * 7 - c * 7 + 16;
223     /* b*15 + c*15 can overflow: it's easier to just branch away in this rare case
224      * than to try to consider it in the asm. */
225     if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) )
226         x264_predict_16x16_p_c( src );
227     else
228         x264_predict_16x16_p_core_sse2( src, i00, b, c );
229 }
230 #endif
231
232 #if !HIGH_BIT_DEPTH
233
234 #define PREDICT_8x8_P(name)\
235 static void x264_predict_8x8c_p_##name( uint8_t *src )\
236 {\
237     int a, b, c;\
238     int H = 0;\
239     int V = 0;\
240     int i00;\
241     PREDICT_P_SUM(3,1)\
242     PREDICT_P_SUM(3,2)\
243     PREDICT_P_SUM(3,3)\
244     PREDICT_P_SUM(3,4)\
245     a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
246     b = ( 17 * H + 16 ) >> 5;\
247     c = ( 17 * V + 16 ) >> 5;\
248     i00 = a -3*b -3*c + 16;\
249     x264_predict_8x8c_p_core_##name( src, i00, b, c );\
250 }
251 #ifndef ARCH_X86_64
252 PREDICT_8x8_P( mmxext )
253 #endif
254 PREDICT_8x8_P( sse2   )
255
256 #endif //!HIGH_BIT_DEPTH
257
258 #ifdef __GNUC__
259 #if HIGH_BIT_DEPTH
260 static void x264_predict_8x8c_p_sse2( uint16_t *src )
261 #else
262 static void x264_predict_8x8c_p_ssse3( uint8_t *src )
263 #endif
264 {
265     int a, b, c, i00;
266     int H, V;
267 #if HIGH_BIT_DEPTH
268     asm (
269         "movdqa           %1, %%xmm0 \n"
270         "pmaddwd          %2, %%xmm0 \n"
271         "movhlps      %%xmm0, %%xmm1 \n"
272         "paddd        %%xmm1, %%xmm0 \n"
273         "pshuflw $14, %%xmm0, %%xmm1 \n"
274         "paddd        %%xmm1, %%xmm0 \n"
275         "movd         %%xmm0, %0     \n"
276         :"=r"(H)
277         :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)
278     );
279 #else
280     asm (
281         "movq           %1, %%mm0 \n"
282         "pmaddubsw      %2, %%mm0 \n"
283         "pshufw $14, %%mm0, %%mm1 \n"
284         "paddw       %%mm1, %%mm0 \n"
285         "pshufw  $1, %%mm0, %%mm1 \n"
286         "paddw       %%mm1, %%mm0 \n"
287         "movd        %%mm0, %0    \n"
288         "movsx         %w0, %0    \n"
289         :"=r"(H)
290         :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)
291     );
292 #endif
293     V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )
294       + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )
295       + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )
296       + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );
297     H += -4 * src[-1*FDEC_STRIDE -1];
298     a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );
299     b = ( 17 * H + 16 ) >> 5;
300     c = ( 17 * V + 16 ) >> 5;
301     i00 = a -3*b -3*c + 16;
302     /* b*7 + c*7 can overflow: it's easier to just branch away in this rare case
303      * than to try to consider it in the asm. */
304     if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 2340 || abs(c) > 2340) )
305         x264_predict_8x8c_p_c( src );
306     else
307         x264_predict_8x8c_p_core_sse2( src, i00, b, c );
308 }
309 #endif
310 #if !HIGH_BIT_DEPTH
311 #if ARCH_X86_64
312 static void x264_predict_8x8c_dc_left( uint8_t *src )
313 {
314     int y;
315     uint32_t s0 = 0, s1 = 0;
316     uint64_t dc0, dc1;
317
318     for( y = 0; y < 4; y++ )
319     {
320         s0 += src[y * FDEC_STRIDE     - 1];
321         s1 += src[(y+4) * FDEC_STRIDE - 1];
322     }
323     dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
324     dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
325
326     for( y = 0; y < 4; y++ )
327     {
328         M64( src ) = dc0;
329         src += FDEC_STRIDE;
330     }
331     for( y = 0; y < 4; y++ )
332     {
333         M64( src ) = dc1;
334         src += FDEC_STRIDE;
335     }
336
337 }
338 #endif
339
340 #define PL(y) \
341     UNUSED int l##y = edge[14-y];
342 #define PT(x) \
343     UNUSED int t##x = edge[16+x];
344 #define PREDICT_8x8_LOAD_LEFT \
345     PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7)
346 #define PREDICT_8x8_LOAD_TOP \
347     PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7)
348
349 #define SUMSUB(a,b,c,d,e,f,g,h)\
350     t=a; a+=b; b-=t;\
351     t=c; c+=d; d-=t;\
352     t=e; e+=f; f-=t;\
353     t=g; g+=h; h-=t;
354
355 #define INTRA_SA8D_X3(cpu)\
356 void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
357 {\
358     PREDICT_8x8_LOAD_TOP\
359     PREDICT_8x8_LOAD_LEFT\
360     int t;\
361     ALIGNED_16( int16_t sa8d_1d[2][8] );\
362     SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
363     SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
364     SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
365     sa8d_1d[0][0] = l0;\
366     sa8d_1d[0][1] = l1;\
367     sa8d_1d[0][2] = l2;\
368     sa8d_1d[0][3] = l3;\
369     sa8d_1d[0][4] = l4;\
370     sa8d_1d[0][5] = l5;\
371     sa8d_1d[0][6] = l6;\
372     sa8d_1d[0][7] = l7;\
373     SUMSUB(t0,t4,t1,t5,t2,t6,t3,t7);\
374     SUMSUB(t0,t2,t1,t3,t4,t6,t5,t7);\
375     SUMSUB(t0,t1,t2,t3,t4,t5,t6,t7);\
376     sa8d_1d[1][0] = t0;\
377     sa8d_1d[1][1] = t1;\
378     sa8d_1d[1][2] = t2;\
379     sa8d_1d[1][3] = t3;\
380     sa8d_1d[1][4] = t4;\
381     sa8d_1d[1][5] = t5;\
382     sa8d_1d[1][6] = t6;\
383     sa8d_1d[1][7] = t7;\
384     x264_intra_sa8d_x3_8x8_core_##cpu( fenc, sa8d_1d, res );\
385 }
386
387 #if ARCH_X86_64
388 INTRA_SA8D_X3(sse2)
389 INTRA_SA8D_X3(ssse3)
390 #else
391 INTRA_SA8D_X3(mmxext)
392 #endif
393 #endif // !HIGH_BIT_DEPTH
394
395 /****************************************************************************
396  * Exported functions:
397  ****************************************************************************/
398 void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
399 {
400     if( !(cpu&X264_CPU_MMX) )
401         return;
402     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_mmx;
403     if( cpu&X264_CPU_MMXEXT )
404     {
405         pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_mmxext;
406         pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_mmxext;
407         pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmxext;
408         pf[I_PRED_16x16_H]       = x264_predict_16x16_h_mmxext;
409     }
410 #if HIGH_BIT_DEPTH
411     if( !(cpu&X264_CPU_SSE2) )
412         return;
413     pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_sse2;
414     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_sse2;
415     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
416     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_sse2;
417     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_sse2;
418     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_sse2;
419 #else
420 #if !ARCH_X86_64
421     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_mmxext;
422 #endif
423     if( !(cpu&X264_CPU_SSE2) )
424         return;
425     pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_sse2;
426     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_sse2;
427     if( cpu&X264_CPU_SSE2_IS_SLOW )
428         return;
429     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_sse2;
430     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
431     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_sse2;
432     if( !(cpu&X264_CPU_SSSE3) )
433         return;
434     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_ssse3;
435 #ifdef __GNUC__
436     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_ssse3;
437 #endif
438 #endif // HIGH_BIT_DEPTH
439 }
440
441 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
442 {
443     if( !(cpu&X264_CPU_MMX) )
444         return;
445 #if HIGH_BIT_DEPTH
446     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
447     if( !(cpu&X264_CPU_MMXEXT) )
448         return;
449     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmxext;
450     if( !(cpu&X264_CPU_SSE2) )
451         return;
452     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_sse2;
453     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_sse2;
454     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_sse2;
455     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_sse2;
456     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_sse2;
457 #else
458 #if ARCH_X86_64
459     pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
460 #endif
461     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
462     if( !(cpu&X264_CPU_MMXEXT) )
463         return;
464     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_mmxext;
465     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_mmxext;
466 #if !ARCH_X86_64
467     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_mmxext;
468 #endif
469     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmxext;
470     if( !(cpu&X264_CPU_SSE2) )
471         return;
472     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_sse2;
473     if( !(cpu&X264_CPU_SSSE3) )
474         return;
475     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_ssse3;
476 #ifdef __GNUC__
477     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_ssse3;
478 #endif
479 #endif // HIGH_BIT_DEPTH
480 }
481
482 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
483 {
484     if( !(cpu&X264_CPU_MMXEXT) )
485         return;
486 #if HIGH_BIT_DEPTH
487     if( !(cpu&X264_CPU_SSE2) )
488         return;
489     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_sse2;
490     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_sse2;
491     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_sse2;
492     pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_sse2;
493     pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
494     pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_sse2;
495     pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_sse2;
496     pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_sse2;
497     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_sse2;
498     pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_sse2;
499     *predict_8x8_filter   = x264_predict_8x8_filter_sse2;
500     if( !(cpu&X264_CPU_SSSE3) )
501         return;
502     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_ssse3;
503     pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_ssse3;
504     pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_ssse3;
505     *predict_8x8_filter   = x264_predict_8x8_filter_ssse3;
506 #else
507     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_mmxext;
508     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_mmxext;
509     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_mmxext;
510     pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmxext;
511     pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmxext;
512     pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_mmxext;
513     *predict_8x8_filter   = x264_predict_8x8_filter_mmxext;
514 #if ARCH_X86
515     pf[I_PRED_8x8_DDL]  = x264_predict_8x8_ddl_mmxext;
516     pf[I_PRED_8x8_DDR]  = x264_predict_8x8_ddr_mmxext;
517     pf[I_PRED_8x8_VR]   = x264_predict_8x8_vr_mmxext;
518     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_mmxext;
519 #endif
520     if( !(cpu&X264_CPU_SSE2) )
521         return;
522     pf[I_PRED_8x8_DDL]  = x264_predict_8x8_ddl_sse2;
523     pf[I_PRED_8x8_VL]   = x264_predict_8x8_vl_sse2;
524     pf[I_PRED_8x8_VR]   = x264_predict_8x8_vr_sse2;
525     pf[I_PRED_8x8_DDR]  = x264_predict_8x8_ddr_sse2;
526     pf[I_PRED_8x8_HD]   = x264_predict_8x8_hd_sse2;
527     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_sse2;
528     if( !(cpu&X264_CPU_SSSE3) )
529         return;
530     pf[I_PRED_8x8_HD]   = x264_predict_8x8_hd_ssse3;
531     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_ssse3;
532     *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
533 #endif // HIGH_BIT_DEPTH
534 }
535
536 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
537 {
538     if( !(cpu&X264_CPU_MMXEXT) )
539         return;
540     pf[I_PRED_4x4_DC]  = x264_predict_4x4_dc_mmxext;
541     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
542     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmxext;
543     pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_mmxext;
544     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_mmxext;
545     pf[I_PRED_4x4_HU]  = x264_predict_4x4_hu_mmxext;
546 #if HIGH_BIT_DEPTH
547     if( !(cpu&X264_CPU_SSE2) )
548         return;
549     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
550     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_sse2;
551     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_sse2;
552     pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_sse2;
553     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_sse2;
554 #else
555     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_mmxext;
556 #endif // HIGH_BIT_DEPTH
557     if( !(cpu&X264_CPU_SSSE3) )
558         return;
559     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
560     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_ssse3;
561     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_ssse3;
562 }