4 #include "common/common.h"
5 #include "common/cpu.h"
7 #include "common/i386/pixel.h"
8 #include "common/i386/dct.h"
9 #include "common/i386/mc.h"
12 #include "common/ppc/pixel.h"
13 #include "common/ppc/mc.h"
16 /* buf1, buf2: initialised to random data and shouldn't write into them */
17 uint8_t * buf1, * buf2;
18 /* buf3, buf4: used to store output */
19 uint8_t * buf3, * buf4;
23 #define report( name ) { \
25 fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \
29 static int check_pixel( int cpu_ref, int cpu_new )
31 x264_pixel_function_t pixel_c;
32 x264_pixel_function_t pixel_ref;
33 x264_pixel_function_t pixel_asm;
34 x264_predict_t predict_16x16[4+3];
35 x264_predict_t predict_8x8c[4+3];
36 x264_predict_t predict_4x4[9+3];
37 x264_predict8x8_t predict_8x8[9+3];
38 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
39 int ret = 0, ok, used_asm;
42 x264_pixel_init( 0, &pixel_c );
43 x264_pixel_init( cpu_ref, &pixel_ref );
44 x264_pixel_init( cpu_new, &pixel_asm );
45 x264_predict_16x16_init( 0, predict_16x16 );
46 x264_predict_8x8c_init( 0, predict_8x8c );
47 x264_predict_8x8_init( 0, predict_8x8 );
48 x264_predict_4x4_init( 0, predict_4x4 );
49 x264_predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
51 #define TEST_PIXEL( name ) \
52 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
55 if( pixel_asm.name[i] != pixel_ref.name[i] ) \
57 for( j=0; j<64; j++ ) \
60 res_c = pixel_c.name[i]( buf1, 32, buf2+j, 16 ); \
61 res_asm = pixel_asm.name[i]( buf1, 32, buf2+j, 16 ); \
62 if( res_c != res_asm ) \
65 fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
71 report( "pixel " #name " :" );
78 #define TEST_PIXEL_X( N ) \
79 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
81 int res_c[4]={0}, res_asm[4]={0}; \
82 if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
84 for( j=0; j<64; j++) \
86 uint8_t *pix2 = buf2+j; \
88 res_c[0] = pixel_c.sad[i]( buf1, 16, pix2, 32 ); \
89 res_c[1] = pixel_c.sad[i]( buf1, 16, pix2+30, 32 ); \
90 res_c[2] = pixel_c.sad[i]( buf1, 16, pix2+1, 32 ); \
93 res_c[3] = pixel_c.sad[i]( buf1, 16, pix2+99, 32 ); \
94 pixel_asm.sad_x4[i]( buf1, pix2, pix2+30, pix2+1, pix2+99, 32, res_asm ); \
97 pixel_asm.sad_x3[i]( buf1, pix2, pix2+30, pix2+1, 32, res_asm ); \
98 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
101 fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
102 i, res_c[0], res_c[1], res_c[2], res_c[3], \
103 res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
108 report( "pixel sad_x"#N" :" );
113 #define TEST_INTRA_SATD( name, pred, satd, i8x8, ... ) \
114 if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
116 int res_c[3], res_asm[3]; \
118 memcpy( buf3, buf2, 1024 ); \
119 for( i=0; i<3; i++ ) \
121 pred[i]( buf3+40, ##__VA_ARGS__ ); \
122 res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \
124 pixel_asm.name( buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
125 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
128 fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \
129 res_c[0], res_c[1], res_c[2], \
130 res_asm[0], res_asm[1], res_asm[2] ); \
134 ok = 1; used_asm = 0;
135 TEST_INTRA_SATD( intra_satd_x3_16x16, predict_16x16, satd[PIXEL_16x16], 0 );
136 TEST_INTRA_SATD( intra_satd_x3_8x8c, predict_8x8c, satd[PIXEL_8x8], 0 );
137 TEST_INTRA_SATD( intra_satd_x3_4x4, predict_4x4, satd[PIXEL_4x4], 0 );
138 TEST_INTRA_SATD( intra_sa8d_x3_8x8, predict_8x8, sa8d[PIXEL_8x8], 1, edge );
139 report( "intra satd_x3 :" );
141 if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core ||
142 pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
146 x264_cpu_restore( cpu_new );
147 res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28 );
148 res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
149 if( fabs(res_c - res_a) > 1e-8 )
152 fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
157 ok = 1; used_asm = 0;
159 if( pixel_asm.ads[i] != pixel_ref.ads[i] )
161 uint16_t res_a[32], res_c[32];
164 for( j=0; j<72; j++ )
165 sums[j] = rand() & 0x3fff;
167 dc[j] = rand() & 0x3fff;
169 pixel_c.ads[i]( dc, sums, 32, res_c, 32 );
170 pixel_asm.ads[i]( dc, sums, 32, res_a, 32 );
171 if( memcmp(res_a, res_c, sizeof(res_c)) )
174 report( "esa ads:" );
179 static int check_dct( int cpu_ref, int cpu_new )
181 x264_dct_function_t dct_c;
182 x264_dct_function_t dct_ref;
183 x264_dct_function_t dct_asm;
184 int ret = 0, ok, used_asm;
185 int16_t dct1[16][4][4] __attribute__((aligned(16)));
186 int16_t dct2[16][4][4] __attribute__((aligned(16)));
188 x264_dct_init( 0, &dct_c );
189 x264_dct_init( cpu_ref, &dct_ref);
190 x264_dct_init( cpu_new, &dct_asm );
191 #define TEST_DCT( name, t1, t2, size ) \
192 if( dct_asm.name != dct_ref.name ) \
195 dct_c.name( t1, buf1, buf2 ); \
196 dct_asm.name( t2, buf1, buf2 ); \
197 if( memcmp( t1, t2, size ) ) \
200 fprintf( stderr, #name " [FAILED]\n" ); \
203 ok = 1; used_asm = 0;
204 TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
205 TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
206 TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
207 report( "sub_dct4 :" );
209 ok = 1; used_asm = 0;
210 TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
211 TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
212 report( "sub_dct8 :" );
215 /* copy coefs because idct8 modifies them in place */
216 memcpy( buf5, dct1, 512 );
218 #define TEST_IDCT( name ) \
219 if( dct_asm.name != dct_ref.name ) \
222 memcpy( buf3, buf1, 32*32 ); \
223 memcpy( buf4, buf1, 32*32 ); \
224 memcpy( dct1, buf5, 512 ); \
225 memcpy( dct2, buf5, 512 ); \
226 dct_c.name( buf3, (void*)dct1 ); \
227 dct_asm.name( buf4, (void*)dct2 ); \
228 if( memcmp( buf3, buf4, 32*32 ) ) \
231 fprintf( stderr, #name " [FAILED]\n" ); \
234 ok = 1; used_asm = 0;
235 TEST_IDCT( add4x4_idct );
236 TEST_IDCT( add8x8_idct );
237 TEST_IDCT( add16x16_idct );
238 report( "add_idct4 :" );
240 ok = 1; used_asm = 0;
241 TEST_IDCT( add8x8_idct8 );
242 TEST_IDCT( add16x16_idct8 );
243 report( "add_idct8 :" );
246 ok = 1; used_asm = 0;
247 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
249 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
250 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
252 dct_c.dct4x4dc( dct1 );
253 dct_asm.dct4x4dc( dct2 );
254 if( memcmp( dct1, dct2, 32 ) )
257 fprintf( stderr, " - dct4x4dc : [FAILED]\n" );
260 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
262 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
263 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
265 dct_c.idct4x4dc( dct1 );
266 dct_asm.idct4x4dc( dct2 );
267 if( memcmp( dct1, dct2, 32 ) )
270 fprintf( stderr, " - idct4x4dc : [FAILED]\n" );
273 report( "(i)dct4x4dc :" );
275 ok = 1; used_asm = 0;
276 if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )
278 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
279 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
281 dct_c.dct2x2dc( dct1 );
282 dct_asm.dct2x2dc( dct2 );
283 if( memcmp( dct1, dct2, 4*2 ) )
286 fprintf( stderr, " - dct2x2dc : [FAILED]\n" );
289 if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )
291 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
292 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
294 dct_c.idct2x2dc( dct1 );
295 dct_asm.idct2x2dc( dct2 );
296 if( memcmp( dct1, dct2, 4*2 ) )
299 fprintf( stderr, " - idct2x2dc : [FAILED]\n" );
302 report( "(i)dct2x2dc :" );
304 x264_zigzag_function_t zigzag_c;
305 x264_zigzag_function_t zigzag_ref;
306 x264_zigzag_function_t zigzag_asm;
308 int32_t level1[64] __attribute__((aligned(16)));
309 int32_t level2[64] __attribute__((aligned(16)));
311 #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
312 if( zigzag_asm.name != zigzag_ref.name ) \
315 zigzag_c.name( t1, dct ); \
316 zigzag_asm.name( t2, dct ); \
317 if( memcmp( t1, t2, size ) ) \
320 fprintf( stderr, #name " [FAILED]\n" ); \
324 #define TEST_ZIGZAG_SUB( name, t1, t2, size ) \
325 if( zigzag_asm.name != zigzag_ref.name ) \
328 memcpy( buf3, buf1, 16*FDEC_STRIDE ); \
329 memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
330 zigzag_c.name( t1, buf2, buf3 ); \
331 zigzag_asm.name( t2, buf2, buf4 ); \
332 if( memcmp( t1, t2, size )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) ) \
335 fprintf( stderr, #name " [FAILED]\n" ); \
339 x264_zigzag_init( 0, &zigzag_c, 0 );
340 x264_zigzag_init( cpu_ref, &zigzag_ref, 0 );
341 x264_zigzag_init( cpu_new, &zigzag_asm, 0 );
343 ok = 1; used_asm = 0;
344 TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64*4 );
345 TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16*4 );
346 TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15*4 );
347 TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16*4 );
348 TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15*4 );
349 report( "zigzag_frame :" );
351 x264_zigzag_init( 0, &zigzag_c, 1 );
352 x264_zigzag_init( cpu_ref, &zigzag_ref, 1 );
353 x264_zigzag_init( cpu_new, &zigzag_asm, 1 );
355 ok = 1; used_asm = 0;
356 TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64*4 );
357 TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16*4 );
358 TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15*4 );
359 TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16*4 );
360 TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15*4 );
361 report( "zigzag_field :" );
362 #undef TEST_ZIGZAG_SCAN
363 #undef TEST_ZIGZAG_SUB
368 static int check_mc( int cpu_ref, int cpu_new )
370 x264_mc_functions_t mc_c;
371 x264_mc_functions_t mc_ref;
372 x264_mc_functions_t mc_a;
373 x264_pixel_function_t pixel;
375 uint8_t *src = &buf1[2*32+2];
376 uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2],
377 &buf1[12*32+2], &buf1[17*32+2] };
378 uint8_t *dst1 = &buf3[2*32+2];
379 uint8_t *dst2 = &buf4[2*32+2];
382 int ret = 0, ok, used_asm;
384 x264_mc_init( 0, &mc_c );
385 x264_mc_init( cpu_ref, &mc_ref );
386 x264_mc_init( cpu_new, &mc_a );
387 x264_pixel_init( 0, &pixel );
389 #define MC_TEST_LUMA( w, h ) \
390 if( mc_a.mc_luma != mc_ref.mc_luma ) \
393 memset(buf3, 0xCD, 1024); \
394 memset(buf4, 0xCD, 1024); \
395 mc_c.mc_luma( dst1, 16, src2, 32, dx, dy, w, h ); \
396 mc_a.mc_luma( dst2, 16, src2, 32, dx, dy, w, h ); \
397 if( memcmp( buf3, buf4, 1024 ) ) \
399 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
403 if( mc_a.get_ref != mc_ref.get_ref ) \
405 uint8_t *ref = dst2; \
406 int ref_stride = 16; \
408 memset(buf3, 0xCD, 1024); \
409 memset(buf4, 0xCD, 1024); \
410 mc_c.mc_luma( dst1, 16, src2, 32, dx, dy, w, h ); \
411 ref = mc_a.get_ref( ref, &ref_stride, src2, 32, dx, dy, w, h ); \
412 if( pixel.sad[PIXEL_##w##x##h]( dst1, 16, ref, ref_stride ) ) \
414 fprintf( stderr, "get_ref[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
419 #define MC_TEST_CHROMA( w, h ) \
420 if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
423 memset(buf3, 0xCD, 1024); \
424 memset(buf4, 0xCD, 1024); \
425 mc_c.mc_chroma( dst1, 16, src, 32, dx, dy, w, h ); \
426 mc_a.mc_chroma( dst2, 16, src, 32, dx, dy, w, h ); \
427 /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\
428 for( j=0; j<h; j++ ) \
429 for( i=w; i<4; i++ ) \
430 dst2[i+j*16] = dst1[i+j*16]; \
431 if( memcmp( buf3, buf4, 1024 ) ) \
433 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
437 ok = 1; used_asm = 0;
438 for( dy = -8; dy < 8; dy++ )
439 for( dx = -8; dx < 8; dx++ )
441 MC_TEST_LUMA( 16, 16 );
442 MC_TEST_LUMA( 16, 8 );
443 MC_TEST_LUMA( 8, 16 );
444 MC_TEST_LUMA( 8, 8 );
445 MC_TEST_LUMA( 8, 4 );
446 MC_TEST_LUMA( 4, 8 );
447 MC_TEST_LUMA( 4, 4 );
449 report( "mc luma :" );
451 ok = 1; used_asm = 0;
452 for( dy = -1; dy < 9; dy++ )
453 for( dx = -1; dx < 9; dx++ )
455 MC_TEST_CHROMA( 8, 8 );
456 MC_TEST_CHROMA( 8, 4 );
457 MC_TEST_CHROMA( 4, 8 );
458 MC_TEST_CHROMA( 4, 4 );
459 MC_TEST_CHROMA( 4, 2 );
460 MC_TEST_CHROMA( 2, 4 );
461 MC_TEST_CHROMA( 2, 2 );
463 report( "mc chroma :" );
465 #undef MC_TEST_CHROMA
467 #define MC_TEST_AVG( name, ... ) \
468 for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \
470 memcpy( buf3, buf1, 1024 ); \
471 memcpy( buf4, buf1, 1024 ); \
472 if( mc_a.name[i] != mc_ref.name[i] ) \
475 mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
476 mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
477 if( memcmp( buf3, buf4, 1024 ) ) \
480 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
485 report( "mc avg :" );
486 ok = 1; used_asm = 0;
487 for( w = -64; w <= 128 && ok; w++ )
488 MC_TEST_AVG( avg_weight, w );
489 report( "mc wpredb :" );
494 static int check_deblock( int cpu_ref, int cpu_new )
496 x264_deblock_function_t db_c;
497 x264_deblock_function_t db_ref;
498 x264_deblock_function_t db_a;
499 int ret = 0, ok = 1, used_asm = 0;
500 int alphas[36], betas[36];
504 x264_deblock_init( 0, &db_c );
505 x264_deblock_init( cpu_ref, &db_ref );
506 x264_deblock_init( cpu_new, &db_a );
508 /* not exactly the real values of a,b,tc but close enough */
510 for( i = 35; i >= 0; i-- )
514 tcs[i][0] = tcs[i][2] = (c+6)/10;
515 tcs[i][1] = tcs[i][3] = (c+9)/20;
520 #define TEST_DEBLOCK( name, ... ) \
521 for( i = 0; i < 36; i++ ) \
523 for( j = 0; j < 1024; j++ ) \
524 /* two distributions of random to excersize different failure modes */\
525 buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \
526 memcpy( buf3, buf1, 1024 ); \
527 memcpy( buf4, buf1, 1024 ); \
528 if( db_a.name != db_ref.name ) \
531 db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
532 db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
533 if( memcmp( buf3, buf4, 1024 ) ) \
536 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
542 TEST_DEBLOCK( deblock_h_luma, tcs[i] );
543 TEST_DEBLOCK( deblock_v_luma, tcs[i] );
544 TEST_DEBLOCK( deblock_h_chroma, tcs[i] );
545 TEST_DEBLOCK( deblock_v_chroma, tcs[i] );
546 TEST_DEBLOCK( deblock_h_luma_intra );
547 TEST_DEBLOCK( deblock_v_luma_intra );
548 TEST_DEBLOCK( deblock_h_chroma_intra );
549 TEST_DEBLOCK( deblock_v_chroma_intra );
551 report( "deblock :" );
556 static int check_quant( int cpu_ref, int cpu_new )
558 x264_quant_function_t qf_c;
559 x264_quant_function_t qf_ref;
560 x264_quant_function_t qf_a;
561 int16_t dct1[64] __attribute__((__aligned__(16)));
562 int16_t dct2[64] __attribute__((__aligned__(16)));
563 uint8_t cqm_buf[64] __attribute__((__aligned__(16)));
564 int ret = 0, ok, used_asm;
565 int oks[2] = {1,1}, used_asms[2] = {0,0};
569 h->pps = h->pps_array;
570 x264_param_default( &h->param );
571 h->param.rc.i_qp_min = 26;
573 for( i_cqm = 0; i_cqm < 4; i_cqm++ )
576 for( i = 0; i < 6; i++ )
577 h->pps->scaling_list[i] = x264_cqm_flat16;
578 else if( i_cqm == 1 )
579 for( i = 0; i < 6; i++ )
580 h->pps->scaling_list[i] = x264_cqm_jvt[i];
584 for( i = 0; i < 64; i++ )
585 cqm_buf[i] = 10 + rand() % 246;
587 for( i = 0; i < 64; i++ )
589 for( i = 0; i < 6; i++ )
590 h->pps->scaling_list[i] = cqm_buf;
594 x264_quant_init( h, 0, &qf_c );
595 x264_quant_init( h, cpu_ref, &qf_ref );
596 x264_quant_init( h, cpu_new, &qf_a );
598 #define INIT_QUANT8() \
600 static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \
602 for( y = 0; y < 8; y++ ) \
603 for( x = 0; x < 8; x++ ) \
605 unsigned int scale = (255*scale1d[y]*scale1d[x])/16; \
606 dct1[y*8+x] = dct2[y*8+x] = (rand()%(2*scale+1))-scale; \
610 #define INIT_QUANT4() \
612 static const int scale1d[4] = {4,6,4,6}; \
614 for( y = 0; y < 4; y++ ) \
615 for( x = 0; x < 4; x++ ) \
617 unsigned int scale = 255*scale1d[y]*scale1d[x]; \
618 dct1[y*4+x] = dct2[y*4+x] = (rand()%(2*scale+1))-scale; \
622 #define TEST_QUANT_DC( name, cqm ) \
623 if( qf_a.name != qf_ref.name ) \
626 for( qp = 51; qp > 0; qp-- ) \
628 for( i = 0; i < 16; i++ ) \
629 dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
630 qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
631 qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
632 if( memcmp( dct1, dct2, 16*2 ) ) \
635 fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
641 #define TEST_QUANT( qname, block, w ) \
642 if( qf_a.qname != qf_ref.qname ) \
645 for( qp = 51; qp > 0; qp-- ) \
648 qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
649 qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
650 if( memcmp( dct1, dct2, w*w*2 ) ) \
653 fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
659 TEST_QUANT( quant_8x8, CQM_8IY, 8 );
660 TEST_QUANT( quant_8x8, CQM_8PY, 8 );
661 TEST_QUANT( quant_4x4, CQM_4IY, 4 );
662 TEST_QUANT( quant_4x4, CQM_4PY, 4 );
663 TEST_QUANT_DC( quant_4x4_dc, **h->quant4_mf[CQM_4IY] );
664 TEST_QUANT_DC( quant_2x2_dc, **h->quant4_mf[CQM_4IC] );
666 #define TEST_DEQUANT( qname, dqname, block, w ) \
667 if( qf_a.dqname != qf_ref.dqname ) \
670 for( qp = 51; qp > 0; qp-- ) \
673 qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
674 memcpy( dct2, dct1, w*w*2 ); \
675 qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \
676 qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \
677 if( memcmp( dct1, dct2, w*w*2 ) ) \
680 fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
686 TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8IY, 8 );
687 TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8PY, 8 );
688 TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 );
689 TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 );
692 ok = oks[0]; used_asm = used_asms[0];
695 ok = oks[1]; used_asm = used_asms[1];
696 report( "dequant :" );
701 static int check_intra( int cpu_ref, int cpu_new )
703 int ret = 0, ok = 1, used_asm = 0;
705 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
708 x264_predict_t predict_16x16[4+3];
709 x264_predict_t predict_8x8c[4+3];
710 x264_predict8x8_t predict_8x8[9+3];
711 x264_predict_t predict_4x4[9+3];
712 } ip_c, ip_ref, ip_a;
714 x264_predict_16x16_init( 0, ip_c.predict_16x16 );
715 x264_predict_8x8c_init( 0, ip_c.predict_8x8c );
716 x264_predict_8x8_init( 0, ip_c.predict_8x8 );
717 x264_predict_4x4_init( 0, ip_c.predict_4x4 );
719 x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 );
720 x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c );
721 x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 );
722 x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 );
724 x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 );
725 x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c );
726 x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 );
727 x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );
729 x264_predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
731 #define INTRA_TEST( name, dir, ... ) \
732 if( ip_a.name[dir] != ip_ref.name[dir] )\
735 memcpy( buf3, buf1, 32*20 );\
736 memcpy( buf4, buf1, 32*20 );\
737 ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
738 ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
739 if( memcmp( buf3, buf4, 32*20 ) )\
741 fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
744 for(k=-1; k<16; k++)\
745 printf("%2x ", edge[16+k]);\
748 printf("%2x ", edge[j]);\
750 printf("%2x ", buf4[48+k+j*32]);\
757 printf("%2x ", buf3[48+k+j*32]);\
763 for( i = 0; i < 12; i++ )
764 INTRA_TEST( predict_4x4, i );
765 for( i = 0; i < 7; i++ )
766 INTRA_TEST( predict_8x8c, i );
767 for( i = 0; i < 7; i++ )
768 INTRA_TEST( predict_16x16, i );
769 for( i = 0; i < 12; i++ )
770 INTRA_TEST( predict_8x8, i, edge );
772 report( "intra pred :" );
776 int check_all( int cpu_ref, int cpu_new )
778 return check_pixel( cpu_ref, cpu_new )
779 + check_dct( cpu_ref, cpu_new )
780 + check_mc( cpu_ref, cpu_new )
781 + check_intra( cpu_ref, cpu_new )
782 + check_deblock( cpu_ref, cpu_new )
783 + check_quant( cpu_ref, cpu_new );
786 int add_flags( int *cpu_ref, int *cpu_new, int flags, const char *name )
790 fprintf( stderr, "x264: %s\n", name );
791 return check_all( *cpu_ref, *cpu_new );
794 int main(int argc, char *argv[])
797 int cpu0 = 0, cpu1 = 0;
800 buf1 = x264_malloc( 1024 ); /* 32 x 32 */
801 buf2 = x264_malloc( 1024 );
802 buf3 = x264_malloc( 1024 );
803 buf4 = x264_malloc( 1024 );
804 buf5 = x264_malloc( 1024 );
806 i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate();
807 fprintf( stderr, "x264: using random seed %u\n", i );
810 for( i = 0; i < 1024; i++ )
812 buf1[i] = rand() & 0xFF;
813 buf2[i] = rand() & 0xFF;
814 buf3[i] = buf4[i] = 0;
818 if( x264_cpu_detect() & X264_CPU_MMXEXT )
820 ret |= add_flags( &cpu0, &cpu1, X264_CPU_MMX | X264_CPU_MMXEXT, "MMXEXT" );
821 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64, "MMXEXT Cache64" );
822 cpu1 &= ~X264_CPU_CACHELINE_64;
823 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_32, "MMXEXT Cache32" );
825 if( x264_cpu_detect() & X264_CPU_SSE2 )
827 cpu1 &= ~(X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_32);
828 ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE | X264_CPU_SSE2, "SSE2" );
829 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64, "SSE2 Cache64" );
831 if( x264_cpu_detect() & X264_CPU_SSE3 )
832 ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE3, "SSE3" );
833 if( x264_cpu_detect() & X264_CPU_SSSE3 )
835 cpu1 &= ~X264_CPU_CACHELINE_SPLIT;
836 ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSSE3, "SSSE3" );
837 ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64, "SSSE3 Cache64" );
840 if( x264_cpu_detect() & X264_CPU_ALTIVEC )
842 fprintf( stderr, "x264: ALTIVEC against C\n" );
843 ret = check_all( 0, X264_CPU_ALTIVEC );
849 fprintf( stderr, "x264: All tests passed Yeah :)\n" );
852 fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" );