1 /*****************************************************************************
2 * dct.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *****************************************************************************/
32 int x264_dct4_weight2_zigzag[2][16];
33 int x264_dct8_weight2_zigzag[2][64];
36 * XXX For all dct dc : input could be equal to output so ...
39 static void dct4x4dc( int16_t d[4][4] )
46 for( i = 0; i < 4; i++ )
48 s01 = d[i][0] + d[i][1];
49 d01 = d[i][0] - d[i][1];
50 s23 = d[i][2] + d[i][3];
51 d23 = d[i][2] - d[i][3];
53 tmp[0][i] = s01 + s23;
54 tmp[1][i] = s01 - s23;
55 tmp[2][i] = d01 - d23;
56 tmp[3][i] = d01 + d23;
59 for( i = 0; i < 4; i++ )
61 s01 = tmp[i][0] + tmp[i][1];
62 d01 = tmp[i][0] - tmp[i][1];
63 s23 = tmp[i][2] + tmp[i][3];
64 d23 = tmp[i][2] - tmp[i][3];
66 d[i][0] = ( s01 + s23 + 1 ) >> 1;
67 d[i][1] = ( s01 - s23 + 1 ) >> 1;
68 d[i][2] = ( d01 - d23 + 1 ) >> 1;
69 d[i][3] = ( d01 + d23 + 1 ) >> 1;
73 static void idct4x4dc( int16_t d[4][4] )
80 for( i = 0; i < 4; i++ )
82 s01 = d[i][0] + d[i][1];
83 d01 = d[i][0] - d[i][1];
84 s23 = d[i][2] + d[i][3];
85 d23 = d[i][2] - d[i][3];
87 tmp[0][i] = s01 + s23;
88 tmp[1][i] = s01 - s23;
89 tmp[2][i] = d01 - d23;
90 tmp[3][i] = d01 + d23;
93 for( i = 0; i < 4; i++ )
95 s01 = tmp[i][0] + tmp[i][1];
96 d01 = tmp[i][0] - tmp[i][1];
97 s23 = tmp[i][2] + tmp[i][3];
98 d23 = tmp[i][2] - tmp[i][3];
107 static inline void pixel_sub_wxh( int16_t *diff, int i_size,
108 uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
111 for( y = 0; y < i_size; y++ )
113 for( x = 0; x < i_size; x++ )
115 diff[x + y*i_size] = pix1[x] - pix2[x];
122 static void sub4x4_dct( int16_t dct[4][4], uint8_t *pix1, uint8_t *pix2 )
128 pixel_sub_wxh( (int16_t*)d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
130 for( i = 0; i < 4; i++ )
132 const int s03 = d[i][0] + d[i][3];
133 const int s12 = d[i][1] + d[i][2];
134 const int d03 = d[i][0] - d[i][3];
135 const int d12 = d[i][1] - d[i][2];
137 tmp[0][i] = s03 + s12;
138 tmp[1][i] = 2*d03 + d12;
139 tmp[2][i] = s03 - s12;
140 tmp[3][i] = d03 - 2*d12;
143 for( i = 0; i < 4; i++ )
145 const int s03 = tmp[i][0] + tmp[i][3];
146 const int s12 = tmp[i][1] + tmp[i][2];
147 const int d03 = tmp[i][0] - tmp[i][3];
148 const int d12 = tmp[i][1] - tmp[i][2];
150 dct[i][0] = s03 + s12;
151 dct[i][1] = 2*d03 + d12;
152 dct[i][2] = s03 - s12;
153 dct[i][3] = d03 - 2*d12;
157 static void sub8x8_dct( int16_t dct[4][4][4], uint8_t *pix1, uint8_t *pix2 )
159 sub4x4_dct( dct[0], &pix1[0], &pix2[0] );
160 sub4x4_dct( dct[1], &pix1[4], &pix2[4] );
161 sub4x4_dct( dct[2], &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
162 sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
165 static void sub16x16_dct( int16_t dct[16][4][4], uint8_t *pix1, uint8_t *pix2 )
167 sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] );
168 sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] );
169 sub8x8_dct( &dct[ 8], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
170 sub8x8_dct( &dct[12], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
174 static void add4x4_idct( uint8_t *p_dst, int16_t dct[4][4] )
181 for( i = 0; i < 4; i++ )
183 const int s02 = dct[0][i] + dct[2][i];
184 const int d02 = dct[0][i] - dct[2][i];
185 const int s13 = dct[1][i] + (dct[3][i]>>1);
186 const int d13 = (dct[1][i]>>1) - dct[3][i];
188 tmp[i][0] = s02 + s13;
189 tmp[i][1] = d02 + d13;
190 tmp[i][2] = d02 - d13;
191 tmp[i][3] = s02 - s13;
194 for( i = 0; i < 4; i++ )
196 const int s02 = tmp[0][i] + tmp[2][i];
197 const int d02 = tmp[0][i] - tmp[2][i];
198 const int s13 = tmp[1][i] + (tmp[3][i]>>1);
199 const int d13 = (tmp[1][i]>>1) - tmp[3][i];
201 d[0][i] = ( s02 + s13 + 32 ) >> 6;
202 d[1][i] = ( d02 + d13 + 32 ) >> 6;
203 d[2][i] = ( d02 - d13 + 32 ) >> 6;
204 d[3][i] = ( s02 - s13 + 32 ) >> 6;
208 for( y = 0; y < 4; y++ )
210 for( x = 0; x < 4; x++ )
212 p_dst[x] = x264_clip_uint8( p_dst[x] + d[y][x] );
214 p_dst += FDEC_STRIDE;
218 static void add8x8_idct( uint8_t *p_dst, int16_t dct[4][4][4] )
220 add4x4_idct( &p_dst[0], dct[0] );
221 add4x4_idct( &p_dst[4], dct[1] );
222 add4x4_idct( &p_dst[4*FDEC_STRIDE+0], dct[2] );
223 add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] );
226 static void add16x16_idct( uint8_t *p_dst, int16_t dct[16][4][4] )
228 add8x8_idct( &p_dst[0], &dct[0] );
229 add8x8_idct( &p_dst[8], &dct[4] );
230 add8x8_idct( &p_dst[8*FDEC_STRIDE+0], &dct[8] );
231 add8x8_idct( &p_dst[8*FDEC_STRIDE+8], &dct[12] );
234 /****************************************************************************
236 ****************************************************************************/
239 const int s07 = SRC(0) + SRC(7);\
240 const int s16 = SRC(1) + SRC(6);\
241 const int s25 = SRC(2) + SRC(5);\
242 const int s34 = SRC(3) + SRC(4);\
243 const int a0 = s07 + s34;\
244 const int a1 = s16 + s25;\
245 const int a2 = s07 - s34;\
246 const int a3 = s16 - s25;\
247 const int d07 = SRC(0) - SRC(7);\
248 const int d16 = SRC(1) - SRC(6);\
249 const int d25 = SRC(2) - SRC(5);\
250 const int d34 = SRC(3) - SRC(4);\
251 const int a4 = d16 + d25 + (d07 + (d07>>1));\
252 const int a5 = d07 - d34 - (d25 + (d25>>1));\
253 const int a6 = d07 + d34 - (d16 + (d16>>1));\
254 const int a7 = d16 - d25 + (d34 + (d34>>1));\
256 DST(1) = a4 + (a7>>2);\
257 DST(2) = a2 + (a3>>1);\
258 DST(3) = a5 + (a6>>2);\
260 DST(5) = a6 - (a5>>2);\
261 DST(6) = (a2>>1) - a3 ;\
262 DST(7) = (a4>>2) - a7 ;\
265 static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
270 pixel_sub_wxh( (int16_t*)tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
272 #define SRC(x) tmp[x][i]
273 #define DST(x) tmp[x][i]
274 for( i = 0; i < 8; i++ )
279 #define SRC(x) tmp[i][x]
280 #define DST(x) dct[x][i]
281 for( i = 0; i < 8; i++ )
287 static void sub16x16_dct8( int16_t dct[4][8][8], uint8_t *pix1, uint8_t *pix2 )
289 sub8x8_dct8( dct[0], &pix1[0], &pix2[0] );
290 sub8x8_dct8( dct[1], &pix1[8], &pix2[8] );
291 sub8x8_dct8( dct[2], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
292 sub8x8_dct8( dct[3], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
296 const int a0 = SRC(0) + SRC(4);\
297 const int a2 = SRC(0) - SRC(4);\
298 const int a4 = (SRC(2)>>1) - SRC(6);\
299 const int a6 = (SRC(6)>>1) + SRC(2);\
300 const int b0 = a0 + a6;\
301 const int b2 = a2 + a4;\
302 const int b4 = a2 - a4;\
303 const int b6 = a0 - a6;\
304 const int a1 = -SRC(3) + SRC(5) - SRC(7) - (SRC(7)>>1);\
305 const int a3 = SRC(1) + SRC(7) - SRC(3) - (SRC(3)>>1);\
306 const int a5 = -SRC(1) + SRC(7) + SRC(5) + (SRC(5)>>1);\
307 const int a7 = SRC(3) + SRC(5) + SRC(1) + (SRC(1)>>1);\
308 const int b1 = (a7>>2) + a1;\
309 const int b3 = a3 + (a5>>2);\
310 const int b5 = (a3>>2) - a5;\
311 const int b7 = a7 - (a1>>2);\
322 static void add8x8_idct8( uint8_t *dst, int16_t dct[8][8] )
326 dct[0][0] += 32; // rounding for the >>6 at the end
328 #define SRC(x) dct[x][i]
329 #define DST(x,rhs) dct[x][i] = (rhs)
330 for( i = 0; i < 8; i++ )
335 #define SRC(x) dct[i][x]
336 #define DST(x,rhs) dst[i + x*FDEC_STRIDE] = x264_clip_uint8( dst[i + x*FDEC_STRIDE] + ((rhs) >> 6) );
337 for( i = 0; i < 8; i++ )
343 static void add16x16_idct8( uint8_t *dst, int16_t dct[4][8][8] )
345 add8x8_idct8( &dst[0], dct[0] );
346 add8x8_idct8( &dst[8], dct[1] );
347 add8x8_idct8( &dst[8*FDEC_STRIDE+0], dct[2] );
348 add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
352 /****************************************************************************
354 ****************************************************************************/
355 void x264_dct_init( int cpu, x264_dct_function_t *dctf )
357 dctf->sub4x4_dct = sub4x4_dct;
358 dctf->add4x4_idct = add4x4_idct;
360 dctf->sub8x8_dct = sub8x8_dct;
361 dctf->add8x8_idct = add8x8_idct;
363 dctf->sub16x16_dct = sub16x16_dct;
364 dctf->add16x16_idct = add16x16_idct;
366 dctf->sub8x8_dct8 = sub8x8_dct8;
367 dctf->add8x8_idct8 = add8x8_idct8;
369 dctf->sub16x16_dct8 = sub16x16_dct8;
370 dctf->add16x16_idct8 = add16x16_idct8;
372 dctf->dct4x4dc = dct4x4dc;
373 dctf->idct4x4dc = idct4x4dc;
376 if( cpu&X264_CPU_MMX )
378 dctf->sub4x4_dct = x264_sub4x4_dct_mmx;
379 dctf->add4x4_idct = x264_add4x4_idct_mmx;
380 dctf->dct4x4dc = x264_dct4x4dc_mmx;
381 dctf->idct4x4dc = x264_idct4x4dc_mmx;
384 dctf->sub8x8_dct = x264_sub8x8_dct_mmx;
385 dctf->sub16x16_dct = x264_sub16x16_dct_mmx;
386 dctf->add8x8_idct = x264_add8x8_idct_mmx;
387 dctf->add16x16_idct = x264_add16x16_idct_mmx;
389 dctf->sub8x8_dct8 = x264_sub8x8_dct8_mmx;
390 dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
391 dctf->add8x8_idct8 = x264_add8x8_idct8_mmx;
392 dctf->add16x16_idct8= x264_add16x16_idct8_mmx;
396 if( cpu&X264_CPU_SSE2 )
398 dctf->sub8x8_dct8 = x264_sub8x8_dct8_sse2;
399 dctf->sub16x16_dct8 = x264_sub16x16_dct8_sse2;
400 dctf->add8x8_idct8 = x264_add8x8_idct8_sse2;
401 dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
403 dctf->sub8x8_dct = x264_sub8x8_dct_sse2;
404 dctf->sub16x16_dct = x264_sub16x16_dct_sse2;
405 dctf->add8x8_idct = x264_add8x8_idct_sse2;
406 dctf->add16x16_idct = x264_add16x16_idct_sse2;
411 if( cpu&X264_CPU_ALTIVEC )
413 dctf->sub4x4_dct = x264_sub4x4_dct_altivec;
414 dctf->sub8x8_dct = x264_sub8x8_dct_altivec;
415 dctf->sub16x16_dct = x264_sub16x16_dct_altivec;
417 dctf->add4x4_idct = x264_add4x4_idct_altivec;
418 dctf->add8x8_idct = x264_add8x8_idct_altivec;
419 dctf->add16x16_idct = x264_add16x16_idct_altivec;
421 dctf->sub8x8_dct8 = x264_sub8x8_dct8_altivec;
422 dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
424 dctf->add8x8_idct8 = x264_add8x8_idct8_altivec;
425 dctf->add16x16_idct8= x264_add16x16_idct8_altivec;
430 void x264_dct_init_weights( void )
435 for( i=0; i<16; i++ )
436 x264_dct4_weight2_zigzag[j][i] = x264_dct4_weight2_tab[ x264_zigzag_scan4[j][i] ];
437 for( i=0; i<64; i++ )
438 x264_dct8_weight2_zigzag[j][i] = x264_dct8_weight2_tab[ x264_zigzag_scan8[j][i] ];
443 // gcc pessimizes multi-dimensional arrays here, even with constant indices
444 #define ZIG(i,y,x) level[i] = dct[0][x*8+y];
445 #define ZIGZAG8_FRAME\
446 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
447 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
448 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)\
449 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)\
450 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)\
451 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)\
452 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)\
453 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)\
454 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)\
455 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)\
456 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)\
457 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)\
458 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)\
459 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)\
460 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)\
461 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)\
463 #define ZIGZAG8_FIELD\
464 ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)\
465 ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)\
466 ZIG( 8,0,2) ZIG( 9,3,1) ZIG(10,5,0) ZIG(11,6,0)\
467 ZIG(12,7,0) ZIG(13,4,1) ZIG(14,1,2) ZIG(15,0,3)\
468 ZIG(16,2,2) ZIG(17,5,1) ZIG(18,6,1) ZIG(19,7,1)\
469 ZIG(20,3,2) ZIG(21,1,3) ZIG(22,0,4) ZIG(23,2,3)\
470 ZIG(24,4,2) ZIG(25,5,2) ZIG(26,6,2) ZIG(27,7,2)\
471 ZIG(28,3,3) ZIG(29,1,4) ZIG(30,0,5) ZIG(31,2,4)\
472 ZIG(32,4,3) ZIG(33,5,3) ZIG(34,6,3) ZIG(35,7,3)\
473 ZIG(36,3,4) ZIG(37,1,5) ZIG(38,0,6) ZIG(39,2,5)\
474 ZIG(40,4,4) ZIG(41,5,4) ZIG(42,6,4) ZIG(43,7,4)\
475 ZIG(44,3,5) ZIG(45,1,6) ZIG(46,2,6) ZIG(47,4,5)\
476 ZIG(48,5,5) ZIG(49,6,5) ZIG(50,7,5) ZIG(51,3,6)\
477 ZIG(52,0,7) ZIG(53,1,7) ZIG(54,4,6) ZIG(55,5,6)\
478 ZIG(56,6,6) ZIG(57,7,6) ZIG(58,2,7) ZIG(59,3,7)\
479 ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
481 #define ZIGZAG4_FRAME\
482 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
483 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
484 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)\
485 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
487 #define ZIGZAG4_FIELD\
488 ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)\
489 ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)\
490 ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)\
491 ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
493 static void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[8][8] )
498 static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[8][8] )
504 #define ZIG(i,y,x) level[i] = dct[0][x*4+y];
506 static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[4][4] )
511 static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
513 *(uint32_t*)level = *(uint32_t*)dct;
514 ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
515 *(uint32_t*)(level+6) = *(uint32_t*)(*dct+6);
516 *(uint64_t*)(level+8) = *(uint64_t*)(*dct+8);
517 *(uint64_t*)(level+12) = *(uint64_t*)(*dct+12);
521 #define ZIG(i,y,x) {\
522 int oe = x+y*FENC_STRIDE;\
523 int od = x+y*FDEC_STRIDE;\
524 level[i] = p_src[oe] - p_dst[od];\
527 *(uint32_t*)(p_dst+0*FDEC_STRIDE) = *(uint32_t*)(p_src+0*FENC_STRIDE);\
528 *(uint32_t*)(p_dst+1*FDEC_STRIDE) = *(uint32_t*)(p_src+1*FENC_STRIDE);\
529 *(uint32_t*)(p_dst+2*FDEC_STRIDE) = *(uint32_t*)(p_src+2*FENC_STRIDE);\
530 *(uint32_t*)(p_dst+3*FDEC_STRIDE) = *(uint32_t*)(p_src+3*FENC_STRIDE);
532 *(uint64_t*)(p_dst+0*FDEC_STRIDE) = *(uint64_t*)(p_src+0*FENC_STRIDE);\
533 *(uint64_t*)(p_dst+1*FDEC_STRIDE) = *(uint64_t*)(p_src+1*FENC_STRIDE);\
534 *(uint64_t*)(p_dst+2*FDEC_STRIDE) = *(uint64_t*)(p_src+2*FENC_STRIDE);\
535 *(uint64_t*)(p_dst+3*FDEC_STRIDE) = *(uint64_t*)(p_src+3*FENC_STRIDE);\
536 *(uint64_t*)(p_dst+4*FDEC_STRIDE) = *(uint64_t*)(p_src+4*FENC_STRIDE);\
537 *(uint64_t*)(p_dst+5*FDEC_STRIDE) = *(uint64_t*)(p_src+5*FENC_STRIDE);\
538 *(uint64_t*)(p_dst+6*FDEC_STRIDE) = *(uint64_t*)(p_src+6*FENC_STRIDE);\
539 *(uint64_t*)(p_dst+7*FDEC_STRIDE) = *(uint64_t*)(p_src+7*FENC_STRIDE);
541 static void zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
547 static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
553 static void zigzag_sub_8x8_frame( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst )
558 static void zigzag_sub_8x8_field( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst )
567 static void zigzag_interleave_8x8_cavlc( int16_t *dst, int16_t *src )
571 for( j=0; j<16; j++ )
572 dst[i*16+j] = src[i+j*4];
575 void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
579 pf->scan_8x8 = zigzag_scan_8x8_field;
580 pf->scan_4x4 = zigzag_scan_4x4_field;
581 pf->sub_8x8 = zigzag_sub_8x8_field;
582 pf->sub_4x4 = zigzag_sub_4x4_field;
584 if( cpu&X264_CPU_MMXEXT )
585 pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
589 if( cpu&X264_CPU_ALTIVEC )
590 pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
595 pf->scan_8x8 = zigzag_scan_8x8_frame;
596 pf->scan_4x4 = zigzag_scan_4x4_frame;
597 pf->sub_8x8 = zigzag_sub_8x8_frame;
598 pf->sub_4x4 = zigzag_sub_4x4_frame;
600 if( cpu&X264_CPU_MMX )
601 pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
602 if( cpu&X264_CPU_MMXEXT )
603 pf->scan_8x8 = x264_zigzag_scan_8x8_frame_mmxext;
604 if( cpu&X264_CPU_SSE2_IS_FAST )
605 pf->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
606 if( cpu&X264_CPU_SSSE3 )
608 pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
609 pf->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
611 if( cpu&X264_CPU_PHADD_IS_FAST )
612 pf->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
616 if( cpu&X264_CPU_ALTIVEC )
617 pf->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
621 pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
623 if( cpu&X264_CPU_MMX )
624 pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;