4 #include "common/common.h"
5 #include "common/cpu.h"
7 #include "common/i386/pixel.h"
8 #include "common/i386/dct.h"
9 #include "common/i386/mc.h"
12 #include "common/ppc/pixel.h"
13 #include "common/ppc/mc.h"
16 /* buf1, buf2: initialised to random data and shouldn't write into them */
17 uint8_t * buf1, * buf2;
18 /* buf3, buf4: used to store output */
19 uint8_t * buf3, * buf4;
23 #define report( name ) { \
25 fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \
29 static int check_pixel( int cpu_ref, int cpu_new )
31 x264_pixel_function_t pixel_c;
32 x264_pixel_function_t pixel_ref;
33 x264_pixel_function_t pixel_asm;
34 x264_predict_t predict_16x16[4+3];
35 x264_predict_t predict_8x8c[4+3];
36 x264_predict_t predict_4x4[9+3];
37 x264_predict8x8_t predict_8x8[9+3];
38 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
39 int ret = 0, ok, used_asm;
42 x264_pixel_init( 0, &pixel_c );
43 x264_pixel_init( cpu_ref, &pixel_ref );
44 x264_pixel_init( cpu_new, &pixel_asm );
45 x264_predict_16x16_init( 0, predict_16x16 );
46 x264_predict_8x8c_init( 0, predict_8x8c );
47 x264_predict_8x8_init( 0, predict_8x8 );
48 x264_predict_4x4_init( 0, predict_4x4 );
49 x264_predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
51 #define TEST_PIXEL( name ) \
52 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
55 if( pixel_asm.name[i] != pixel_ref.name[i] ) \
57 for( j=0; j<64; j++ ) \
60 res_c = pixel_c.name[i]( buf1, 32, buf2+j, 16 ); \
61 res_asm = pixel_asm.name[i]( buf1, 32, buf2+j, 16 ); \
62 if( res_c != res_asm ) \
65 fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
71 report( "pixel " #name " :" );
78 #define TEST_PIXEL_X( N ) \
79 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
81 int res_c[4]={0}, res_asm[4]={0}; \
82 if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
84 for( j=0; j<64; j++) \
86 uint8_t *pix2 = buf2+j; \
88 res_c[0] = pixel_c.sad[i]( buf1, 16, pix2, 32 ); \
89 res_c[1] = pixel_c.sad[i]( buf1, 16, pix2+30, 32 ); \
90 res_c[2] = pixel_c.sad[i]( buf1, 16, pix2+1, 32 ); \
93 res_c[3] = pixel_c.sad[i]( buf1, 16, pix2+99, 32 ); \
94 pixel_asm.sad_x4[i]( buf1, pix2, pix2+30, pix2+1, pix2+99, 32, res_asm ); \
97 pixel_asm.sad_x3[i]( buf1, pix2, pix2+30, pix2+1, 32, res_asm ); \
98 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
101 fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
102 i, res_c[0], res_c[1], res_c[2], res_c[3], \
103 res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
108 report( "pixel sad_x"#N" :" );
113 #define TEST_INTRA_SATD( name, pred, satd, i8x8, ... ) \
114 if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
116 int res_c[3], res_asm[3]; \
118 memcpy( buf3, buf2, 1024 ); \
119 for( i=0; i<3; i++ ) \
121 pred[i]( buf3+40, ##__VA_ARGS__ ); \
122 res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \
124 pixel_asm.name( buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
125 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
128 fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \
129 res_c[0], res_c[1], res_c[2], \
130 res_asm[0], res_asm[1], res_asm[2] ); \
134 ok = 1; used_asm = 0;
135 TEST_INTRA_SATD( intra_satd_x3_16x16, predict_16x16, satd[PIXEL_16x16], 0 );
136 TEST_INTRA_SATD( intra_satd_x3_8x8c, predict_8x8c, satd[PIXEL_8x8], 0 );
137 TEST_INTRA_SATD( intra_satd_x3_4x4, predict_4x4, satd[PIXEL_4x4], 0 );
138 TEST_INTRA_SATD( intra_sa8d_x3_8x8, predict_8x8, sa8d[PIXEL_8x8], 1, edge );
139 report( "intra satd_x3 :" );
141 if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core ||
142 pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
146 x264_cpu_restore( cpu_new );
147 res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28 );
148 res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
149 if( fabs(res_c - res_a) > 1e-8 )
152 fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
157 ok = 1; used_asm = 0;
159 if( pixel_asm.ads[i] != pixel_ref.ads[i] )
161 uint16_t res_a[32], res_c[32];
164 for( j=0; j<72; j++ )
165 sums[j] = rand() & 0x3fff;
167 dc[j] = rand() & 0x3fff;
169 pixel_c.ads[i]( dc, sums, 32, res_c, 32 );
170 pixel_asm.ads[i]( dc, sums, 32, res_a, 32 );
171 if( memcmp(res_a, res_c, sizeof(res_c)) )
174 report( "esa ads:" );
179 static int check_dct( int cpu_ref, int cpu_new )
181 x264_dct_function_t dct_c;
182 x264_dct_function_t dct_ref;
183 x264_dct_function_t dct_asm;
184 int ret = 0, ok, used_asm;
185 int16_t dct1[16][4][4] __attribute__((aligned(16)));
186 int16_t dct2[16][4][4] __attribute__((aligned(16)));
188 x264_dct_init( 0, &dct_c );
189 x264_dct_init( cpu_ref, &dct_ref);
190 x264_dct_init( cpu_new, &dct_asm );
191 #define TEST_DCT( name, t1, t2, size ) \
192 if( dct_asm.name != dct_ref.name ) \
195 dct_c.name( t1, buf1, buf2 ); \
196 dct_asm.name( t2, buf1, buf2 ); \
197 if( memcmp( t1, t2, size ) ) \
200 fprintf( stderr, #name " [FAILED]\n" ); \
203 ok = 1; used_asm = 0;
204 TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
205 TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
206 TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
207 report( "sub_dct4 :" );
209 ok = 1; used_asm = 0;
210 TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
211 TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
212 report( "sub_dct8 :" );
215 /* copy coefs because idct8 modifies them in place */
216 memcpy( buf5, dct1, 512 );
218 #define TEST_IDCT( name ) \
219 if( dct_asm.name != dct_ref.name ) \
222 memcpy( buf3, buf1, 32*32 ); \
223 memcpy( buf4, buf1, 32*32 ); \
224 memcpy( dct1, buf5, 512 ); \
225 memcpy( dct2, buf5, 512 ); \
226 dct_c.name( buf3, (void*)dct1 ); \
227 dct_asm.name( buf4, (void*)dct2 ); \
228 if( memcmp( buf3, buf4, 32*32 ) ) \
231 fprintf( stderr, #name " [FAILED]\n" ); \
234 ok = 1; used_asm = 0;
235 TEST_IDCT( add4x4_idct );
236 TEST_IDCT( add8x8_idct );
237 TEST_IDCT( add16x16_idct );
238 report( "add_idct4 :" );
240 ok = 1; used_asm = 0;
241 TEST_IDCT( add8x8_idct8 );
242 TEST_IDCT( add16x16_idct8 );
243 report( "add_idct8 :" );
246 ok = 1; used_asm = 0;
247 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
249 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
250 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
252 dct_c.dct4x4dc( dct1 );
253 dct_asm.dct4x4dc( dct2 );
254 if( memcmp( dct1, dct2, 32 ) )
257 fprintf( stderr, " - dct4x4dc : [FAILED]\n" );
260 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
262 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
263 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
265 dct_c.idct4x4dc( dct1 );
266 dct_asm.idct4x4dc( dct2 );
267 if( memcmp( dct1, dct2, 32 ) )
270 fprintf( stderr, " - idct4x4dc : [FAILED]\n" );
273 report( "(i)dct4x4dc :" );
275 ok = 1; used_asm = 0;
276 if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )
278 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
279 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
281 dct_c.dct2x2dc( dct1 );
282 dct_asm.dct2x2dc( dct2 );
283 if( memcmp( dct1, dct2, 4*2 ) )
286 fprintf( stderr, " - dct2x2dc : [FAILED]\n" );
289 if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )
291 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
292 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
294 dct_c.idct2x2dc( dct1 );
295 dct_asm.idct2x2dc( dct2 );
296 if( memcmp( dct1, dct2, 4*2 ) )
299 fprintf( stderr, " - idct2x2dc : [FAILED]\n" );
302 report( "(i)dct2x2dc :" );
307 static int check_mc( int cpu_ref, int cpu_new )
309 x264_mc_functions_t mc_c;
310 x264_mc_functions_t mc_ref;
311 x264_mc_functions_t mc_a;
312 x264_pixel_function_t pixel;
314 uint8_t *src = &buf1[2*32+2];
315 uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2],
316 &buf1[12*32+2], &buf1[17*32+2] };
317 uint8_t *dst1 = &buf3[2*32+2];
318 uint8_t *dst2 = &buf4[2*32+2];
321 int ret = 0, ok, used_asm;
323 x264_mc_init( 0, &mc_c );
324 x264_mc_init( cpu_ref, &mc_ref );
325 x264_mc_init( cpu_new, &mc_a );
326 x264_pixel_init( 0, &pixel );
328 #define MC_TEST_LUMA( w, h ) \
329 if( mc_a.mc_luma != mc_ref.mc_luma ) \
332 memset(buf3, 0xCD, 1024); \
333 memset(buf4, 0xCD, 1024); \
334 mc_c.mc_luma( dst1, 16, src2, 32, dx, dy, w, h ); \
335 mc_a.mc_luma( dst2, 16, src2, 32, dx, dy, w, h ); \
336 if( memcmp( buf3, buf4, 1024 ) ) \
338 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
342 if( mc_a.get_ref != mc_ref.get_ref ) \
344 uint8_t *ref = dst2; \
345 int ref_stride = 16; \
347 memset(buf3, 0xCD, 1024); \
348 memset(buf4, 0xCD, 1024); \
349 mc_c.mc_luma( dst1, 16, src2, 32, dx, dy, w, h ); \
350 ref = mc_a.get_ref( ref, &ref_stride, src2, 32, dx, dy, w, h ); \
351 if( pixel.sad[PIXEL_##w##x##h]( dst1, 16, ref, ref_stride ) ) \
353 fprintf( stderr, "get_ref[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
358 #define MC_TEST_CHROMA( w, h ) \
359 if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
362 memset(buf3, 0xCD, 1024); \
363 memset(buf4, 0xCD, 1024); \
364 mc_c.mc_chroma( dst1, 16, src, 32, dx, dy, w, h ); \
365 mc_a.mc_chroma( dst2, 16, src, 32, dx, dy, w, h ); \
366 /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\
367 for( j=0; j<h; j++ ) \
368 for( i=w; i<4; i++ ) \
369 dst2[i+j*16] = dst1[i+j*16]; \
370 if( memcmp( buf3, buf4, 1024 ) ) \
372 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
376 ok = 1; used_asm = 0;
377 for( dy = -8; dy < 8; dy++ )
378 for( dx = -8; dx < 8; dx++ )
380 MC_TEST_LUMA( 16, 16 );
381 MC_TEST_LUMA( 16, 8 );
382 MC_TEST_LUMA( 8, 16 );
383 MC_TEST_LUMA( 8, 8 );
384 MC_TEST_LUMA( 8, 4 );
385 MC_TEST_LUMA( 4, 8 );
386 MC_TEST_LUMA( 4, 4 );
388 report( "mc luma :" );
390 ok = 1; used_asm = 0;
391 for( dy = -1; dy < 9; dy++ )
392 for( dx = -1; dx < 9; dx++ )
394 MC_TEST_CHROMA( 8, 8 );
395 MC_TEST_CHROMA( 8, 4 );
396 MC_TEST_CHROMA( 4, 8 );
397 MC_TEST_CHROMA( 4, 4 );
398 MC_TEST_CHROMA( 4, 2 );
399 MC_TEST_CHROMA( 2, 4 );
400 MC_TEST_CHROMA( 2, 2 );
402 report( "mc chroma :" );
404 #undef MC_TEST_CHROMA
406 #define MC_TEST_AVG( name, ... ) \
407 for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \
409 memcpy( buf3, buf1, 1024 ); \
410 memcpy( buf4, buf1, 1024 ); \
411 if( mc_a.name[i] != mc_ref.name[i] ) \
414 mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
415 mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
416 if( memcmp( buf3, buf4, 1024 ) ) \
419 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
424 report( "mc avg :" );
425 ok = 1; used_asm = 0;
426 for( w = -64; w <= 128 && ok; w++ )
427 MC_TEST_AVG( avg_weight, w );
428 report( "mc wpredb :" );
433 static int check_deblock( int cpu_ref, int cpu_new )
435 x264_deblock_function_t db_c;
436 x264_deblock_function_t db_ref;
437 x264_deblock_function_t db_a;
438 int ret = 0, ok = 1, used_asm = 0;
439 int alphas[36], betas[36];
443 x264_deblock_init( 0, &db_c );
444 x264_deblock_init( cpu_ref, &db_ref );
445 x264_deblock_init( cpu_new, &db_a );
447 /* not exactly the real values of a,b,tc but close enough */
449 for( i = 35; i >= 0; i-- )
453 tcs[i][0] = tcs[i][2] = (c+6)/10;
454 tcs[i][1] = tcs[i][3] = (c+9)/20;
459 #define TEST_DEBLOCK( name, ... ) \
460 for( i = 0; i < 36; i++ ) \
462 for( j = 0; j < 1024; j++ ) \
463 /* two distributions of random to excersize different failure modes */\
464 buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \
465 memcpy( buf3, buf1, 1024 ); \
466 memcpy( buf4, buf1, 1024 ); \
467 if( db_a.name != db_ref.name ) \
470 db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
471 db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
472 if( memcmp( buf3, buf4, 1024 ) ) \
475 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
481 TEST_DEBLOCK( deblock_h_luma, tcs[i] );
482 TEST_DEBLOCK( deblock_v_luma, tcs[i] );
483 TEST_DEBLOCK( deblock_h_chroma, tcs[i] );
484 TEST_DEBLOCK( deblock_v_chroma, tcs[i] );
485 TEST_DEBLOCK( deblock_h_luma_intra );
486 TEST_DEBLOCK( deblock_v_luma_intra );
487 TEST_DEBLOCK( deblock_h_chroma_intra );
488 TEST_DEBLOCK( deblock_v_chroma_intra );
490 report( "deblock :" );
495 static int check_quant( int cpu_ref, int cpu_new )
497 x264_quant_function_t qf_c;
498 x264_quant_function_t qf_ref;
499 x264_quant_function_t qf_a;
500 int16_t dct1[64] __attribute__((__aligned__(16)));
501 int16_t dct2[64] __attribute__((__aligned__(16)));
502 uint8_t cqm_buf[64] __attribute__((__aligned__(16)));
503 int ret = 0, ok, used_asm;
504 int oks[2] = {1,1}, used_asms[2] = {0,0};
508 h->pps = h->pps_array;
509 x264_param_default( &h->param );
510 h->param.rc.i_qp_min = 26;
512 for( i_cqm = 0; i_cqm < 4; i_cqm++ )
515 for( i = 0; i < 6; i++ )
516 h->pps->scaling_list[i] = x264_cqm_flat16;
517 else if( i_cqm == 1 )
518 for( i = 0; i < 6; i++ )
519 h->pps->scaling_list[i] = x264_cqm_jvt[i];
523 for( i = 0; i < 64; i++ )
524 cqm_buf[i] = 10 + rand() % 246;
526 for( i = 0; i < 64; i++ )
528 for( i = 0; i < 6; i++ )
529 h->pps->scaling_list[i] = cqm_buf;
533 x264_quant_init( h, 0, &qf_c );
534 x264_quant_init( h, cpu_ref, &qf_ref );
535 x264_quant_init( h, cpu_new, &qf_a );
537 #define INIT_QUANT8() \
539 static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \
541 for( y = 0; y < 8; y++ ) \
542 for( x = 0; x < 8; x++ ) \
544 unsigned int scale = (255*scale1d[y]*scale1d[x])/16; \
545 dct1[y*8+x] = dct2[y*8+x] = (rand()%(2*scale+1))-scale; \
549 #define INIT_QUANT4() \
551 static const int scale1d[4] = {4,6,4,6}; \
553 for( y = 0; y < 4; y++ ) \
554 for( x = 0; x < 4; x++ ) \
556 unsigned int scale = 255*scale1d[y]*scale1d[x]; \
557 dct1[y*4+x] = dct2[y*4+x] = (rand()%(2*scale+1))-scale; \
561 #define TEST_QUANT_DC( name, cqm ) \
562 if( qf_a.name != qf_ref.name ) \
565 for( qp = 51; qp > 0; qp-- ) \
567 for( i = 0; i < 16; i++ ) \
568 dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
569 qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
570 qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
571 if( memcmp( dct1, dct2, 16*2 ) ) \
574 fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
580 #define TEST_QUANT( qname, block, w ) \
581 if( qf_a.qname != qf_ref.qname ) \
584 for( qp = 51; qp > 0; qp-- ) \
587 qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
588 qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
589 if( memcmp( dct1, dct2, w*w*2 ) ) \
592 fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
598 TEST_QUANT( quant_8x8, CQM_8IY, 8 );
599 TEST_QUANT( quant_8x8, CQM_8PY, 8 );
600 TEST_QUANT( quant_4x4, CQM_4IY, 4 );
601 TEST_QUANT( quant_4x4, CQM_4PY, 4 );
602 TEST_QUANT_DC( quant_4x4_dc, **h->quant4_mf[CQM_4IY] );
603 TEST_QUANT_DC( quant_2x2_dc, **h->quant4_mf[CQM_4IC] );
605 #define TEST_DEQUANT( qname, dqname, block, w ) \
606 if( qf_a.dqname != qf_ref.dqname ) \
609 for( qp = 51; qp > 0; qp-- ) \
612 qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
613 memcpy( dct2, dct1, w*w*2 ); \
614 qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \
615 qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \
616 if( memcmp( dct1, dct2, w*w*2 ) ) \
619 fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
625 TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8IY, 8 );
626 TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8PY, 8 );
627 TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 );
628 TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 );
631 ok = oks[0]; used_asm = used_asms[0];
634 ok = oks[1]; used_asm = used_asms[1];
635 report( "dequant :" );
640 static int check_intra( int cpu_ref, int cpu_new )
642 int ret = 0, ok = 1, used_asm = 0;
644 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
647 x264_predict_t predict_16x16[4+3];
648 x264_predict_t predict_8x8c[4+3];
649 x264_predict8x8_t predict_8x8[9+3];
650 x264_predict_t predict_4x4[9+3];
651 } ip_c, ip_ref, ip_a;
653 x264_predict_16x16_init( 0, ip_c.predict_16x16 );
654 x264_predict_8x8c_init( 0, ip_c.predict_8x8c );
655 x264_predict_8x8_init( 0, ip_c.predict_8x8 );
656 x264_predict_4x4_init( 0, ip_c.predict_4x4 );
658 x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 );
659 x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c );
660 x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 );
661 x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 );
663 x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 );
664 x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c );
665 x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 );
666 x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );
668 x264_predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
670 #define INTRA_TEST( name, dir, ... ) \
671 if( ip_a.name[dir] != ip_ref.name[dir] )\
674 memcpy( buf3, buf1, 32*20 );\
675 memcpy( buf4, buf1, 32*20 );\
676 ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
677 ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
678 if( memcmp( buf3, buf4, 32*20 ) )\
680 fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
683 for(k=-1; k<16; k++)\
684 printf("%2x ", edge[16+k]);\
687 printf("%2x ", edge[j]);\
689 printf("%2x ", buf4[48+k+j*32]);\
696 printf("%2x ", buf3[48+k+j*32]);\
702 for( i = 0; i < 12; i++ )
703 INTRA_TEST( predict_4x4, i );
704 for( i = 0; i < 7; i++ )
705 INTRA_TEST( predict_8x8c, i );
706 for( i = 0; i < 7; i++ )
707 INTRA_TEST( predict_16x16, i );
708 for( i = 0; i < 12; i++ )
709 INTRA_TEST( predict_8x8, i, edge );
711 report( "intra pred :" );
715 int check_all( int cpu_ref, int cpu_new )
717 return check_pixel( cpu_ref, cpu_new )
718 + check_dct( cpu_ref, cpu_new )
719 + check_mc( cpu_ref, cpu_new )
720 + check_intra( cpu_ref, cpu_new )
721 + check_deblock( cpu_ref, cpu_new )
722 + check_quant( cpu_ref, cpu_new );
725 int add_flags( int *cpu_ref, int *cpu_new, int flags, const char *name )
729 fprintf( stderr, "x264: %s\n", name );
730 return check_all( *cpu_ref, *cpu_new );
733 int main(int argc, char *argv[])
736 int cpu0 = 0, cpu1 = 0;
739 buf1 = x264_malloc( 1024 ); /* 32 x 32 */
740 buf2 = x264_malloc( 1024 );
741 buf3 = x264_malloc( 1024 );
742 buf4 = x264_malloc( 1024 );
743 buf5 = x264_malloc( 1024 );
745 i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate();
746 fprintf( stderr, "x264: using random seed %u\n", i );
749 for( i = 0; i < 1024; i++ )
751 buf1[i] = rand() & 0xFF;
752 buf2[i] = rand() & 0xFF;
753 buf3[i] = buf4[i] = 0;
757 if( x264_cpu_detect() & X264_CPU_MMXEXT )
759 ret |= add_flags( &cpu0, &cpu1, X264_CPU_MMX | X264_CPU_MMXEXT, "MMXEXT" );
760 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64, "MMXEXT Cache64" );
761 cpu1 &= ~X264_CPU_CACHELINE_64;
762 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_32, "MMXEXT Cache32" );
764 if( x264_cpu_detect() & X264_CPU_SSE2 )
766 cpu1 &= ~(X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_32);
767 ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE | X264_CPU_SSE2, "SSE2" );
768 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64, "SSE2 Cache64" );
770 if( x264_cpu_detect() & X264_CPU_SSE3 )
771 ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE3, "SSE3" );
772 if( x264_cpu_detect() & X264_CPU_SSSE3 )
774 cpu1 &= ~X264_CPU_CACHELINE_SPLIT;
775 ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSSE3, "SSSE3" );
776 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64, "SSSE3 Cache64" );
779 if( x264_cpu_detect() & X264_CPU_ALTIVEC )
781 fprintf( stderr, "x264: ALTIVEC against C\n" );
782 ret = check_all( 0, X264_CPU_ALTIVEC );
788 fprintf( stderr, "x264: All tests passed Yeah :)\n" );
791 fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" );