1 /*****************************************************************************
2 * dct.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *****************************************************************************/
32 int x264_dct4_weight2_zigzag[2][16];
33 int x264_dct8_weight2_zigzag[2][64];
36 * XXX For all dct dc : input could be equal to output so ...
39 static void dct2x2dc( int16_t d[2][2] )
43 tmp[0][0] = d[0][0] + d[0][1];
44 tmp[1][0] = d[0][0] - d[0][1];
45 tmp[0][1] = d[1][0] + d[1][1];
46 tmp[1][1] = d[1][0] - d[1][1];
48 d[0][0] = tmp[0][0] + tmp[0][1];
49 d[1][0] = tmp[1][0] + tmp[1][1];
50 d[0][1] = tmp[0][0] - tmp[0][1];
51 d[1][1] = tmp[1][0] - tmp[1][1];
54 static void dct4x4dc( int16_t d[4][4] )
61 for( i = 0; i < 4; i++ )
63 s01 = d[i][0] + d[i][1];
64 d01 = d[i][0] - d[i][1];
65 s23 = d[i][2] + d[i][3];
66 d23 = d[i][2] - d[i][3];
68 tmp[0][i] = s01 + s23;
69 tmp[1][i] = s01 - s23;
70 tmp[2][i] = d01 - d23;
71 tmp[3][i] = d01 + d23;
74 for( i = 0; i < 4; i++ )
76 s01 = tmp[i][0] + tmp[i][1];
77 d01 = tmp[i][0] - tmp[i][1];
78 s23 = tmp[i][2] + tmp[i][3];
79 d23 = tmp[i][2] - tmp[i][3];
81 d[i][0] = ( s01 + s23 + 1 ) >> 1;
82 d[i][1] = ( s01 - s23 + 1 ) >> 1;
83 d[i][2] = ( d01 - d23 + 1 ) >> 1;
84 d[i][3] = ( d01 + d23 + 1 ) >> 1;
88 static void idct4x4dc( int16_t d[4][4] )
95 for( i = 0; i < 4; i++ )
97 s01 = d[i][0] + d[i][1];
98 d01 = d[i][0] - d[i][1];
99 s23 = d[i][2] + d[i][3];
100 d23 = d[i][2] - d[i][3];
102 tmp[0][i] = s01 + s23;
103 tmp[1][i] = s01 - s23;
104 tmp[2][i] = d01 - d23;
105 tmp[3][i] = d01 + d23;
108 for( i = 0; i < 4; i++ )
110 s01 = tmp[i][0] + tmp[i][1];
111 d01 = tmp[i][0] - tmp[i][1];
112 s23 = tmp[i][2] + tmp[i][3];
113 d23 = tmp[i][2] - tmp[i][3];
122 static inline void pixel_sub_wxh( int16_t *diff, int i_size,
123 uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
126 for( y = 0; y < i_size; y++ )
128 for( x = 0; x < i_size; x++ )
130 diff[x + y*i_size] = pix1[x] - pix2[x];
137 static void sub4x4_dct( int16_t dct[4][4], uint8_t *pix1, uint8_t *pix2 )
143 pixel_sub_wxh( (int16_t*)d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
145 for( i = 0; i < 4; i++ )
147 const int s03 = d[i][0] + d[i][3];
148 const int s12 = d[i][1] + d[i][2];
149 const int d03 = d[i][0] - d[i][3];
150 const int d12 = d[i][1] - d[i][2];
152 tmp[0][i] = s03 + s12;
153 tmp[1][i] = 2*d03 + d12;
154 tmp[2][i] = s03 - s12;
155 tmp[3][i] = d03 - 2*d12;
158 for( i = 0; i < 4; i++ )
160 const int s03 = tmp[i][0] + tmp[i][3];
161 const int s12 = tmp[i][1] + tmp[i][2];
162 const int d03 = tmp[i][0] - tmp[i][3];
163 const int d12 = tmp[i][1] - tmp[i][2];
165 dct[i][0] = s03 + s12;
166 dct[i][1] = 2*d03 + d12;
167 dct[i][2] = s03 - s12;
168 dct[i][3] = d03 - 2*d12;
172 static void sub8x8_dct( int16_t dct[4][4][4], uint8_t *pix1, uint8_t *pix2 )
174 sub4x4_dct( dct[0], &pix1[0], &pix2[0] );
175 sub4x4_dct( dct[1], &pix1[4], &pix2[4] );
176 sub4x4_dct( dct[2], &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
177 sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
180 static void sub16x16_dct( int16_t dct[16][4][4], uint8_t *pix1, uint8_t *pix2 )
182 sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] );
183 sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] );
184 sub8x8_dct( &dct[ 8], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
185 sub8x8_dct( &dct[12], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
189 static void add4x4_idct( uint8_t *p_dst, int16_t dct[4][4] )
196 for( i = 0; i < 4; i++ )
198 const int s02 = dct[0][i] + dct[2][i];
199 const int d02 = dct[0][i] - dct[2][i];
200 const int s13 = dct[1][i] + (dct[3][i]>>1);
201 const int d13 = (dct[1][i]>>1) - dct[3][i];
203 tmp[i][0] = s02 + s13;
204 tmp[i][1] = d02 + d13;
205 tmp[i][2] = d02 - d13;
206 tmp[i][3] = s02 - s13;
209 for( i = 0; i < 4; i++ )
211 const int s02 = tmp[0][i] + tmp[2][i];
212 const int d02 = tmp[0][i] - tmp[2][i];
213 const int s13 = tmp[1][i] + (tmp[3][i]>>1);
214 const int d13 = (tmp[1][i]>>1) - tmp[3][i];
216 d[0][i] = ( s02 + s13 + 32 ) >> 6;
217 d[1][i] = ( d02 + d13 + 32 ) >> 6;
218 d[2][i] = ( d02 - d13 + 32 ) >> 6;
219 d[3][i] = ( s02 - s13 + 32 ) >> 6;
223 for( y = 0; y < 4; y++ )
225 for( x = 0; x < 4; x++ )
227 p_dst[x] = x264_clip_uint8( p_dst[x] + d[y][x] );
229 p_dst += FDEC_STRIDE;
233 static void add8x8_idct( uint8_t *p_dst, int16_t dct[4][4][4] )
235 add4x4_idct( &p_dst[0], dct[0] );
236 add4x4_idct( &p_dst[4], dct[1] );
237 add4x4_idct( &p_dst[4*FDEC_STRIDE+0], dct[2] );
238 add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] );
241 static void add16x16_idct( uint8_t *p_dst, int16_t dct[16][4][4] )
243 add8x8_idct( &p_dst[0], &dct[0] );
244 add8x8_idct( &p_dst[8], &dct[4] );
245 add8x8_idct( &p_dst[8*FDEC_STRIDE+0], &dct[8] );
246 add8x8_idct( &p_dst[8*FDEC_STRIDE+8], &dct[12] );
249 /****************************************************************************
251 ****************************************************************************/
254 const int s07 = SRC(0) + SRC(7);\
255 const int s16 = SRC(1) + SRC(6);\
256 const int s25 = SRC(2) + SRC(5);\
257 const int s34 = SRC(3) + SRC(4);\
258 const int a0 = s07 + s34;\
259 const int a1 = s16 + s25;\
260 const int a2 = s07 - s34;\
261 const int a3 = s16 - s25;\
262 const int d07 = SRC(0) - SRC(7);\
263 const int d16 = SRC(1) - SRC(6);\
264 const int d25 = SRC(2) - SRC(5);\
265 const int d34 = SRC(3) - SRC(4);\
266 const int a4 = d16 + d25 + (d07 + (d07>>1));\
267 const int a5 = d07 - d34 - (d25 + (d25>>1));\
268 const int a6 = d07 + d34 - (d16 + (d16>>1));\
269 const int a7 = d16 - d25 + (d34 + (d34>>1));\
271 DST(1) = a4 + (a7>>2);\
272 DST(2) = a2 + (a3>>1);\
273 DST(3) = a5 + (a6>>2);\
275 DST(5) = a6 - (a5>>2);\
276 DST(6) = (a2>>1) - a3 ;\
277 DST(7) = (a4>>2) - a7 ;\
280 static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
285 pixel_sub_wxh( (int16_t*)tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
287 #define SRC(x) tmp[x][i]
288 #define DST(x) tmp[x][i]
289 for( i = 0; i < 8; i++ )
294 #define SRC(x) tmp[i][x]
295 #define DST(x) dct[x][i]
296 for( i = 0; i < 8; i++ )
302 static void sub16x16_dct8( int16_t dct[4][8][8], uint8_t *pix1, uint8_t *pix2 )
304 sub8x8_dct8( dct[0], &pix1[0], &pix2[0] );
305 sub8x8_dct8( dct[1], &pix1[8], &pix2[8] );
306 sub8x8_dct8( dct[2], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
307 sub8x8_dct8( dct[3], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
311 const int a0 = SRC(0) + SRC(4);\
312 const int a2 = SRC(0) - SRC(4);\
313 const int a4 = (SRC(2)>>1) - SRC(6);\
314 const int a6 = (SRC(6)>>1) + SRC(2);\
315 const int b0 = a0 + a6;\
316 const int b2 = a2 + a4;\
317 const int b4 = a2 - a4;\
318 const int b6 = a0 - a6;\
319 const int a1 = -SRC(3) + SRC(5) - SRC(7) - (SRC(7)>>1);\
320 const int a3 = SRC(1) + SRC(7) - SRC(3) - (SRC(3)>>1);\
321 const int a5 = -SRC(1) + SRC(7) + SRC(5) + (SRC(5)>>1);\
322 const int a7 = SRC(3) + SRC(5) + SRC(1) + (SRC(1)>>1);\
323 const int b1 = (a7>>2) + a1;\
324 const int b3 = a3 + (a5>>2);\
325 const int b5 = (a3>>2) - a5;\
326 const int b7 = a7 - (a1>>2);\
337 static void add8x8_idct8( uint8_t *dst, int16_t dct[8][8] )
341 dct[0][0] += 32; // rounding for the >>6 at the end
343 #define SRC(x) dct[x][i]
344 #define DST(x,rhs) dct[x][i] = (rhs)
345 for( i = 0; i < 8; i++ )
350 #define SRC(x) dct[i][x]
351 #define DST(x,rhs) dst[i + x*FDEC_STRIDE] = x264_clip_uint8( dst[i + x*FDEC_STRIDE] + ((rhs) >> 6) );
352 for( i = 0; i < 8; i++ )
358 static void add16x16_idct8( uint8_t *dst, int16_t dct[4][8][8] )
360 add8x8_idct8( &dst[0], dct[0] );
361 add8x8_idct8( &dst[8], dct[1] );
362 add8x8_idct8( &dst[8*FDEC_STRIDE+0], dct[2] );
363 add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
367 /****************************************************************************
369 ****************************************************************************/
370 void x264_dct_init( int cpu, x264_dct_function_t *dctf )
372 dctf->sub4x4_dct = sub4x4_dct;
373 dctf->add4x4_idct = add4x4_idct;
375 dctf->sub8x8_dct = sub8x8_dct;
376 dctf->add8x8_idct = add8x8_idct;
378 dctf->sub16x16_dct = sub16x16_dct;
379 dctf->add16x16_idct = add16x16_idct;
381 dctf->sub8x8_dct8 = sub8x8_dct8;
382 dctf->add8x8_idct8 = add8x8_idct8;
384 dctf->sub16x16_dct8 = sub16x16_dct8;
385 dctf->add16x16_idct8 = add16x16_idct8;
387 dctf->dct4x4dc = dct4x4dc;
388 dctf->idct4x4dc = idct4x4dc;
390 dctf->dct2x2dc = dct2x2dc;
391 dctf->idct2x2dc = dct2x2dc;
394 if( cpu&X264_CPU_MMX )
396 dctf->sub4x4_dct = x264_sub4x4_dct_mmx;
397 dctf->add4x4_idct = x264_add4x4_idct_mmx;
398 dctf->dct4x4dc = x264_dct4x4dc_mmx;
399 dctf->idct4x4dc = x264_idct4x4dc_mmx;
402 dctf->sub8x8_dct = x264_sub8x8_dct_mmx;
403 dctf->sub16x16_dct = x264_sub16x16_dct_mmx;
404 dctf->add8x8_idct = x264_add8x8_idct_mmx;
405 dctf->add16x16_idct = x264_add16x16_idct_mmx;
407 dctf->sub8x8_dct8 = x264_sub8x8_dct8_mmx;
408 dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
409 dctf->add8x8_idct8 = x264_add8x8_idct8_mmx;
410 dctf->add16x16_idct8= x264_add16x16_idct8_mmx;
414 if( cpu&X264_CPU_SSE2 )
416 dctf->sub8x8_dct8 = x264_sub8x8_dct8_sse2;
417 dctf->sub16x16_dct8 = x264_sub16x16_dct8_sse2;
418 dctf->add8x8_idct8 = x264_add8x8_idct8_sse2;
419 dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
421 dctf->sub8x8_dct = x264_sub8x8_dct_sse2;
422 dctf->sub16x16_dct = x264_sub16x16_dct_sse2;
423 dctf->add8x8_idct = x264_add8x8_idct_sse2;
424 dctf->add16x16_idct = x264_add16x16_idct_sse2;
429 if( cpu&X264_CPU_ALTIVEC )
431 dctf->sub4x4_dct = x264_sub4x4_dct_altivec;
432 dctf->sub8x8_dct = x264_sub8x8_dct_altivec;
433 dctf->sub16x16_dct = x264_sub16x16_dct_altivec;
435 dctf->add4x4_idct = x264_add4x4_idct_altivec;
436 dctf->add8x8_idct = x264_add8x8_idct_altivec;
437 dctf->add16x16_idct = x264_add16x16_idct_altivec;
439 dctf->sub8x8_dct8 = x264_sub8x8_dct8_altivec;
440 dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
442 dctf->add8x8_idct8 = x264_add8x8_idct8_altivec;
443 dctf->add16x16_idct8= x264_add16x16_idct8_altivec;
448 void x264_dct_init_weights( void )
453 for( i=0; i<16; i++ )
454 x264_dct4_weight2_zigzag[j][i] = x264_dct4_weight2_tab[ x264_zigzag_scan4[j][i] ];
455 for( i=0; i<64; i++ )
456 x264_dct8_weight2_zigzag[j][i] = x264_dct8_weight2_tab[ x264_zigzag_scan8[j][i] ];
461 // gcc pessimizes multi-dimensional arrays here, even with constant indices
462 #define ZIG(i,y,x) level[i] = dct[0][x*8+y];
464 static void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[8][8] )
466 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
467 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
468 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
469 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
470 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
471 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
472 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
473 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
474 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
475 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
476 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
477 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
478 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
479 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
480 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
481 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
484 static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[8][8] )
486 ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)
487 ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)
488 ZIG( 8,0,2) ZIG( 9,3,1) ZIG(10,5,0) ZIG(11,6,0)
489 ZIG(12,7,0) ZIG(13,4,1) ZIG(14,1,2) ZIG(15,0,3)
490 ZIG(16,2,2) ZIG(17,5,1) ZIG(18,6,1) ZIG(19,7,1)
491 ZIG(20,3,2) ZIG(21,1,3) ZIG(22,0,4) ZIG(23,2,3)
492 ZIG(24,4,2) ZIG(25,5,2) ZIG(26,6,2) ZIG(27,7,2)
493 ZIG(28,3,3) ZIG(29,1,4) ZIG(30,0,5) ZIG(31,2,4)
494 ZIG(32,4,3) ZIG(33,5,3) ZIG(34,6,3) ZIG(35,7,3)
495 ZIG(36,3,4) ZIG(37,1,5) ZIG(38,0,6) ZIG(39,2,5)
496 ZIG(40,4,4) ZIG(41,5,4) ZIG(42,6,4) ZIG(43,7,4)
497 ZIG(44,3,5) ZIG(45,1,6) ZIG(46,2,6) ZIG(47,4,5)
498 ZIG(48,5,5) ZIG(49,6,5) ZIG(50,7,5) ZIG(51,3,6)
499 ZIG(52,0,7) ZIG(53,1,7) ZIG(54,4,6) ZIG(55,5,6)
500 ZIG(56,6,6) ZIG(57,7,6) ZIG(58,2,7) ZIG(59,3,7)
501 ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
505 #define ZIG(i,y,x) level[i] = dct[0][x*4+y];
507 static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[4][4] )
509 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
510 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
511 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
512 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
515 static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
517 *(uint32_t*)level = *(uint32_t*)dct;
518 ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
519 *(uint32_t*)(level+6) = *(uint32_t*)(*dct+6);
520 *(uint64_t*)(level+8) = *(uint64_t*)(*dct+8);
521 *(uint64_t*)(level+12) = *(uint64_t*)(*dct+12);
525 #define ZIG(i,y,x) {\
526 int oe = x+y*FENC_STRIDE;\
527 int od = x+y*FDEC_STRIDE;\
528 level[i] = p_src[oe] - p_dst[od];\
531 *(uint32_t*)(p_dst+0*FDEC_STRIDE) = *(uint32_t*)(p_src+0*FENC_STRIDE);\
532 *(uint32_t*)(p_dst+1*FDEC_STRIDE) = *(uint32_t*)(p_src+1*FENC_STRIDE);\
533 *(uint32_t*)(p_dst+2*FDEC_STRIDE) = *(uint32_t*)(p_src+2*FENC_STRIDE);\
534 *(uint32_t*)(p_dst+3*FDEC_STRIDE) = *(uint32_t*)(p_src+3*FENC_STRIDE);\
536 static void zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
538 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
539 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
540 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
541 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
545 static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
547 ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)
548 ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)
549 ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)
550 ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
557 void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
561 pf->scan_8x8 = zigzag_scan_8x8_field;
562 pf->scan_4x4 = zigzag_scan_4x4_field;
563 pf->sub_4x4 = zigzag_sub_4x4_field;
565 if( cpu&X264_CPU_MMXEXT )
566 pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
570 if( cpu&X264_CPU_ALTIVEC )
571 pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
576 pf->scan_8x8 = zigzag_scan_8x8_frame;
577 pf->scan_4x4 = zigzag_scan_4x4_frame;
578 pf->sub_4x4 = zigzag_sub_4x4_frame;
580 if( cpu&X264_CPU_SSSE3 )
581 pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
585 if( cpu&X264_CPU_ALTIVEC )
586 pf->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;