5 #include "common/common.h"
6 #include "common/cpu.h"
8 #include "common/i386/pixel.h"
9 #include "common/i386/dct.h"
10 #include "common/i386/mc.h"
13 #include "common/ppc/pixel.h"
14 #include "common/ppc/mc.h"
17 /* buf1, buf2: initialised to random data and shouldn't write into them */
18 uint8_t * buf1, * buf2;
19 /* buf3, buf4: used to store output */
20 uint8_t * buf3, * buf4;
24 #define report( name ) { \
26 fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \
30 static int check_pixel( int cpu_ref, int cpu_new )
32 x264_pixel_function_t pixel_c;
33 x264_pixel_function_t pixel_ref;
34 x264_pixel_function_t pixel_asm;
35 int ret = 0, ok, used_asm;
38 x264_pixel_init( 0, &pixel_c );
39 x264_pixel_init( cpu_ref, &pixel_ref );
40 x264_pixel_init( cpu_new, &pixel_asm );
42 #define TEST_PIXEL( name ) \
43 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
46 if( pixel_asm.name[i] != pixel_ref.name[i] ) \
49 res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
50 res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
51 if( res_c != res_asm ) \
54 fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
58 report( "pixel " #name " :" );
67 static int check_dct( int cpu_ref, int cpu_new )
69 x264_dct_function_t dct_c;
70 x264_dct_function_t dct_ref;
71 x264_dct_function_t dct_asm;
72 int ret = 0, ok, used_asm;
73 int16_t dct1[16][4][4] __attribute((aligned(16)));
74 int16_t dct2[16][4][4] __attribute((aligned(16)));
76 x264_dct_init( 0, &dct_c );
77 x264_dct_init( cpu_ref, &dct_ref);
78 x264_dct_init( cpu_new, &dct_asm );
79 #define TEST_DCT( name, t1, t2, size ) \
80 if( dct_asm.name != dct_ref.name ) \
83 dct_c.name( t1, buf1, 32, buf2, 24 ); \
84 dct_asm.name( t2, buf1, 32, buf2, 24 ); \
85 if( memcmp( t1, t2, size ) ) \
88 fprintf( stderr, #name " [FAILED]\n" ); \
92 TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
93 TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
94 TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
95 report( "sub_dct4 :" );
98 TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
99 TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
100 report( "sub_dct8 :" );
103 /* copy coefs because idct8 modifies them in place */
104 memcpy( buf5, dct1, 512 );
106 #define TEST_IDCT( name ) \
107 if( dct_asm.name != dct_ref.name ) \
110 memcpy( buf3, buf1, 32*32 ); \
111 memcpy( buf4, buf1, 32*32 ); \
112 memcpy( dct1, buf5, 512 ); \
113 memcpy( dct2, buf5, 512 ); \
114 dct_c.name( buf3, 32, (void*)dct1 ); \
115 dct_asm.name( buf4, 32, (void*)dct2 ); \
116 if( memcmp( buf3, buf4, 32*32 ) ) \
119 fprintf( stderr, #name " [FAILED]\n" ); \
122 ok = 1; used_asm = 0;
123 TEST_IDCT( add4x4_idct );
124 TEST_IDCT( add8x8_idct );
125 TEST_IDCT( add16x16_idct );
126 report( "add_idct4 :" );
128 ok = 1; used_asm = 0;
129 TEST_IDCT( add8x8_idct8 );
130 TEST_IDCT( add16x16_idct8 );
131 report( "add_idct8 :" );
134 ok = 1; used_asm = 0;
135 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
137 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
138 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
140 dct_c.dct4x4dc( dct1 );
141 dct_asm.dct4x4dc( dct2 );
142 if( memcmp( dct1, dct2, 32 ) )
145 fprintf( stderr, " - dct4x4dc : [FAILED]\n" );
148 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
150 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
151 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
153 dct_c.idct4x4dc( dct1 );
154 dct_asm.idct4x4dc( dct2 );
155 if( memcmp( dct1, dct2, 32 ) )
158 fprintf( stderr, " - idct4x4dc : [FAILED]\n" );
161 report( "(i)dct4x4dc :" );
163 ok = 1; used_asm = 0;
164 if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )
166 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
167 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
169 dct_c.dct2x2dc( dct1 );
170 dct_asm.dct2x2dc( dct2 );
171 if( memcmp( dct1, dct2, 4*2 ) )
174 fprintf( stderr, " - dct2x2dc : [FAILED]\n" );
177 if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )
179 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
180 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
182 dct_c.idct2x2dc( dct1 );
183 dct_asm.idct2x2dc( dct2 );
184 if( memcmp( dct1, dct2, 4*2 ) )
187 fprintf( stderr, " - idct2x2dc : [FAILED]\n" );
190 report( "(i)dct2x2dc :" );
195 static int check_mc( int cpu_ref, int cpu_new )
197 x264_mc_functions_t mc_c;
198 x264_mc_functions_t mc_ref;
199 x264_mc_functions_t mc_a;
201 uint8_t *src = &buf1[2*32+2];
202 uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2],
203 &buf1[12*32+2], &buf1[17*32+2] };
204 uint8_t *dst1 = &buf3[2*32+2];
205 uint8_t *dst2 = &buf4[2*32+2];
208 int ret = 0, ok, used_asm;
210 x264_mc_init( 0, &mc_c );
211 x264_mc_init( cpu_ref, &mc_ref );
212 x264_mc_init( cpu_new, &mc_a );
214 #define MC_TEST_LUMA( w, h ) \
215 if( mc_a.mc_luma != mc_ref.mc_luma ) \
218 memset(buf3, 0xCD, 1024); \
219 memset(buf4, 0xCD, 1024); \
220 mc_c.mc_luma( src2, 32, dst1, 16, dx, dy, w, h ); \
221 mc_a.mc_luma( src2, 32, dst2, 16, dx, dy, w, h ); \
222 if( memcmp( buf3, buf4, 1024 ) ) \
224 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
229 #define MC_TEST_CHROMA( w, h ) \
230 if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
233 memset(buf3, 0xCD, 1024); \
234 memset(buf4, 0xCD, 1024); \
235 mc_c.mc_chroma( src, 32, dst1, 16, dx, dy, w, h ); \
236 mc_a.mc_chroma( src, 32, dst2, 16, dx, dy, w, h ); \
237 if( memcmp( buf3, buf4, 1024 ) ) \
239 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
243 ok = 1; used_asm = 0;
244 for( dy = 0; dy < 4; dy++ )
245 for( dx = 0; dx < 4; dx++ )
247 MC_TEST_LUMA( 16, 16 );
248 MC_TEST_LUMA( 16, 8 );
249 MC_TEST_LUMA( 8, 16 );
250 MC_TEST_LUMA( 8, 8 );
251 MC_TEST_LUMA( 8, 4 );
252 MC_TEST_LUMA( 4, 8 );
253 MC_TEST_LUMA( 4, 4 );
255 report( "mc luma :" );
257 ok = 1; used_asm = 0;
258 for( dy = 0; dy < 9; dy++ )
259 for( dx = 0; dx < 9; dx++ )
261 MC_TEST_CHROMA( 8, 8 );
262 MC_TEST_CHROMA( 8, 4 );
263 MC_TEST_CHROMA( 4, 8 );
264 MC_TEST_CHROMA( 4, 4 );
265 MC_TEST_CHROMA( 4, 2 );
266 MC_TEST_CHROMA( 2, 4 );
267 MC_TEST_CHROMA( 2, 2 );
269 report( "mc chroma :" );
271 #undef MC_TEST_CHROMA
273 #define MC_TEST_AVG( name, ... ) \
274 for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \
276 memcpy( buf3, buf1, 1024 ); \
277 memcpy( buf4, buf1, 1024 ); \
278 if( mc_a.name[i] != mc_ref.name[i] ) \
281 mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
282 mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
283 if( memcmp( buf3, buf4, 1024 ) ) \
286 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
291 report( "mc avg :" );
292 for( w = -64; w <= 128 && ok; w++ )
293 MC_TEST_AVG( avg_weight, w );
294 report( "mc wpredb :" );
299 static int check_deblock( int cpu_ref, int cpu_new )
301 x264_deblock_function_t db_c;
302 x264_deblock_function_t db_ref;
303 x264_deblock_function_t db_a;
304 int ret = 0, ok = 1, used_asm = 0;
305 int alphas[36], betas[36];
309 x264_deblock_init( 0, &db_c );
310 x264_deblock_init( cpu_ref, &db_ref );
311 x264_deblock_init( cpu_new, &db_a );
313 /* not exactly the real values of a,b,tc but close enough */
315 for( i = 35; i >= 0; i-- )
319 tcs[i][0] = tcs[i][2] = (c+6)/10;
320 tcs[i][1] = tcs[i][3] = (c+9)/20;
325 #define TEST_DEBLOCK( name, ... ) \
326 for( i = 0; i < 36; i++ ) \
328 for( j = 0; j < 1024; j++ ) \
329 /* two distributions of random to excersize different failure modes */\
330 buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \
331 memcpy( buf3, buf1, 1024 ); \
332 memcpy( buf4, buf1, 1024 ); \
333 if( db_a.name != db_ref.name ) \
336 db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
337 db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
338 if( memcmp( buf3, buf4, 1024 ) ) \
341 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
347 TEST_DEBLOCK( deblock_h_luma, tcs[i] );
348 TEST_DEBLOCK( deblock_v_luma, tcs[i] );
349 TEST_DEBLOCK( deblock_h_chroma, tcs[i] );
350 TEST_DEBLOCK( deblock_v_chroma, tcs[i] );
351 TEST_DEBLOCK( deblock_h_luma_intra );
352 TEST_DEBLOCK( deblock_v_luma_intra );
353 TEST_DEBLOCK( deblock_h_chroma_intra );
354 TEST_DEBLOCK( deblock_v_chroma_intra );
356 report( "deblock :" );
361 static int check_quant( int cpu_ref, int cpu_new )
363 x264_quant_function_t qf_c;
364 x264_quant_function_t qf_ref;
365 x264_quant_function_t qf_a;
366 int16_t dct1[64], dct2[64];
368 int ret = 0, ok, used_asm;
369 int oks[2] = {1,1}, used_asms[2] = {0,0};
373 h->pps = h->pps_array;
374 x264_param_default( &h->param );
376 for( i_cqm = 0; i_cqm < 4; i_cqm++ )
379 for( i = 0; i < 6; i++ )
380 h->pps->scaling_list[i] = x264_cqm_flat16;
381 else if( i_cqm == 1 )
382 for( i = 0; i < 6; i++ )
383 h->pps->scaling_list[i] = x264_cqm_jvt[i];
387 for( i = 0; i < 64; i++ )
388 cqm_buf[i] = 10 + rand() % 246;
390 for( i = 0; i < 64; i++ )
392 for( i = 0; i < 6; i++ )
393 h->pps->scaling_list[i] = cqm_buf;
397 x264_quant_init( h, 0, &qf_c );
398 x264_quant_init( h, cpu_ref, &qf_ref );
399 x264_quant_init( h, cpu_new, &qf_a );
401 #define TEST_QUANT( name, cqm ) \
402 if( qf_a.name != qf_ref.name ) \
405 for( i = 0; i < 64; i++ ) \
406 dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
407 qf_c.name( (void*)dct1, cqm, 20, (1<<20)/6 ); \
408 qf_a.name( (void*)dct2, cqm, 20, (1<<20)/6 ); \
409 if( memcmp( dct1, dct2, 64*2 ) ) \
412 fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
416 TEST_QUANT( quant_8x8_core, *h->quant8_mf[CQM_8IY] );
417 TEST_QUANT( quant_8x8_core, *h->quant8_mf[CQM_8PY] );
418 TEST_QUANT( quant_4x4_core, *h->quant4_mf[CQM_4IY] );
419 TEST_QUANT( quant_4x4_core, *h->quant4_mf[CQM_4PY] );
420 TEST_QUANT( quant_4x4_dc_core, ***h->quant4_mf[CQM_4IY] );
421 TEST_QUANT( quant_2x2_dc_core, ***h->quant4_mf[CQM_4IC] );
423 #define TEST_DEQUANT( name, quant, dqm, cqm, shift ) \
424 if( qf_a.name != qf_ref.name ) \
428 for( qp = 51; qp > 0; qp-- ) \
430 for( i = 0; i < 64; i++ ) \
431 dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
432 qf_c.quant( (void*)dct1, cqm[qp%6], shift+qp/6, 0 ); \
433 memcpy( dct2, dct1, sizeof(dct2) ); \
434 qf_c.name( (void*)dct1, dqm, qp ); \
435 qf_a.name( (void*)dct2, dqm, qp ); \
436 if( memcmp( dct1, dct2, 64*2 ) ) \
439 fprintf( stderr, #name "(qp=%d, cqm=%d): [FAILED]\n", qp, i_cqm ); \
445 TEST_DEQUANT( dequant_8x8, quant_8x8_core, h->dequant8_mf[CQM_8PY], h->quant8_mf[CQM_8PY], 16 );
446 TEST_DEQUANT( dequant_4x4, quant_4x4_core, h->dequant4_mf[CQM_4PY], h->quant4_mf[CQM_4PY], 15 );
449 ok = oks[0]; used_asm = used_asms[0];
452 ok = oks[1]; used_asm = used_asms[1];
453 report( "dequant :" );
458 static int check_intra( int cpu_ref, int cpu_new )
460 int ret = 0, ok = 1, used_asm = 0;
464 x264_predict_t predict_16x16[4+3];
465 x264_predict_t predict_8x8c[4+3];
466 x264_predict8x8_t predict_8x8[9+3];
467 x264_predict_t predict_4x4[9+3];
468 } ip_c, ip_ref, ip_a;
470 x264_predict_16x16_init( 0, ip_c.predict_16x16 );
471 x264_predict_8x8c_init( 0, ip_c.predict_8x8c );
472 x264_predict_8x8_init( 0, ip_c.predict_8x8 );
473 x264_predict_4x4_init( 0, ip_c.predict_4x4 );
475 x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 );
476 x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c );
477 x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 );
478 x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 );
480 x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 );
481 x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c );
482 x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 );
483 x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );
485 #define INTRA_TEST( name, dir, ... ) \
486 if( ip_a.name[dir] != ip_ref.name[dir] )\
489 memcpy( buf3, buf1, 32*20 );\
490 memcpy( buf4, buf1, 32*20 );\
491 ip_c.name[dir]( buf3+48, 32, ##__VA_ARGS__ );\
492 ip_a.name[dir]( buf4+48, 32, ##__VA_ARGS__ );\
493 if( memcmp( buf3, buf4, 32*20 ) )\
495 fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
500 for( i = 0; i < 12; i++ )
501 INTRA_TEST( predict_4x4, i );
502 for( i = 0; i < 7; i++ )
503 INTRA_TEST( predict_8x8c, i );
504 for( i = 0; i < 7; i++ )
505 INTRA_TEST( predict_16x16, i );
506 for( i = 0; i < 12; i++ )
507 INTRA_TEST( predict_8x8, i, 0xf );
508 INTRA_TEST( predict_8x8, I_PRED_8x8_V, MB_LEFT|MB_TOP );
509 INTRA_TEST( predict_8x8, I_PRED_8x8_DC, MB_LEFT|MB_TOP );
510 INTRA_TEST( predict_8x8, I_PRED_8x8_V, MB_LEFT|MB_TOP|MB_TOPLEFT );
511 INTRA_TEST( predict_8x8, I_PRED_8x8_DC, MB_LEFT|MB_TOP|MB_TOPLEFT );
512 INTRA_TEST( predict_8x8, I_PRED_8x8_V, MB_LEFT|MB_TOP|MB_TOPRIGHT );
513 INTRA_TEST( predict_8x8, I_PRED_8x8_DC, MB_LEFT|MB_TOP|MB_TOPRIGHT );
515 report( "intra pred :" );
519 int check_all( int cpu_ref, int cpu_new )
521 return check_pixel( cpu_ref, cpu_new )
522 + check_dct( cpu_ref, cpu_new )
523 + check_mc( cpu_ref, cpu_new )
524 + check_intra( cpu_ref, cpu_new )
525 + check_deblock( cpu_ref, cpu_new )
526 + check_quant( cpu_ref, cpu_new );
529 int main(int argc, char *argv[])
534 buf1 = x264_malloc( 1024 ); /* 32 x 32 */
535 buf2 = x264_malloc( 1024 );
536 buf3 = x264_malloc( 1024 );
537 buf4 = x264_malloc( 1024 );
538 buf5 = x264_malloc( 1024 );
540 i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate();
541 fprintf( stderr, "x264: using random seed %u\n", i );
544 for( i = 0; i < 1024; i++ )
546 buf1[i] = rand() & 0xFF;
547 buf2[i] = rand() & 0xFF;
548 buf3[i] = buf4[i] = 0;
552 fprintf( stderr, "x264: MMXEXT against C\n" );
553 ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT );
555 if( x264_cpu_detect() & X264_CPU_SSE2 )
557 fprintf( stderr, "\nx264: SSE2 against C\n" );
558 ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT,
559 X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 );
563 fprintf( stderr, "x264: ALTIVEC against C\n" );
564 ret = check_all( 0, X264_CPU_ALTIVEC );
569 fprintf( stderr, "x264: All tests passed Yeah :)\n" );
572 fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" );