4 #include "common/common.h"
5 #include "common/cpu.h"
7 #include "common/i386/pixel.h"
8 #include "common/i386/dct.h"
9 #include "common/i386/mc.h"
12 #include "common/ppc/pixel.h"
13 #include "common/ppc/mc.h"
16 /* buf1, buf2: initialised to random data and shouldn't write into them */
17 uint8_t * buf1, * buf2;
18 /* buf3, buf4: used to store output */
19 uint8_t * buf3, * buf4;
23 #define report( name ) { \
25 fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \
29 static int check_pixel( int cpu_ref, int cpu_new )
31 x264_pixel_function_t pixel_c;
32 x264_pixel_function_t pixel_ref;
33 x264_pixel_function_t pixel_asm;
34 x264_predict_t predict_16x16[4+3];
35 x264_predict_t predict_8x8c[4+3];
36 x264_predict_t predict_4x4[9+3];
37 x264_predict8x8_t predict_8x8[9+3];
38 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
39 int ret = 0, ok, used_asm;
42 x264_pixel_init( 0, &pixel_c );
43 x264_pixel_init( cpu_ref, &pixel_ref );
44 x264_pixel_init( cpu_new, &pixel_asm );
45 x264_predict_16x16_init( 0, predict_16x16 );
46 x264_predict_8x8c_init( 0, predict_8x8c );
47 x264_predict_8x8_init( 0, predict_8x8 );
48 x264_predict_4x4_init( 0, predict_4x4 );
49 x264_predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
51 #define TEST_PIXEL( name ) \
52 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
55 if( pixel_asm.name[i] != pixel_ref.name[i] ) \
58 res_c = pixel_c.name[i]( buf1, 32, buf2, 16 ); \
59 res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \
60 if( res_c != res_asm ) \
63 fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
67 report( "pixel " #name " :" );
74 #define TEST_PIXEL_X( N ) \
75 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
77 int res_c[4]={0}, res_asm[4]={0}; \
78 if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
81 res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \
82 res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \
83 res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \
86 res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \
87 pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \
90 pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \
91 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
94 fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
95 i, res_c[0], res_c[1], res_c[2], res_c[3], \
96 res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
100 report( "pixel sad_x"#N" :" );
105 #define TEST_INTRA_SATD( name, pred, satd, i8x8, ... ) \
106 if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
108 int res_c[3], res_asm[3]; \
110 memcpy( buf3, buf2, 1024 ); \
111 for( i=0; i<3; i++ ) \
113 pred[i]( buf3+40, ##__VA_ARGS__ ); \
114 res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \
116 pixel_asm.name( buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
117 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
120 fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \
121 res_c[0], res_c[1], res_c[2], \
122 res_asm[0], res_asm[1], res_asm[2] ); \
126 ok = 1; used_asm = 0;
127 TEST_INTRA_SATD( intra_satd_x3_16x16, predict_16x16, satd[PIXEL_16x16], 0 );
128 TEST_INTRA_SATD( intra_satd_x3_8x8c, predict_8x8c, satd[PIXEL_8x8], 0 );
129 TEST_INTRA_SATD( intra_satd_x3_4x4, predict_4x4, satd[PIXEL_4x4], 0 );
130 TEST_INTRA_SATD( intra_sa8d_x3_8x8, predict_8x8, sa8d[PIXEL_8x8], 1, edge );
131 report( "intra satd_x3 :" );
133 if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core ||
134 pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
138 x264_cpu_restore( cpu_new );
139 res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28 );
140 res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
141 if( fabs(res_c - res_a) > 1e-8 )
144 fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
149 ok = 1; used_asm = 0;
151 if( pixel_asm.ads[i] != pixel_ref.ads[i] )
153 uint16_t res_a[32], res_c[32];
156 for( j=0; j<72; j++ )
157 sums[j] = rand() & 0x3fff;
159 dc[j] = rand() & 0x3fff;
161 pixel_c.ads[i]( dc, sums, 32, res_c, 32 );
162 pixel_asm.ads[i]( dc, sums, 32, res_a, 32 );
163 if( memcmp(res_a, res_c, sizeof(res_c)) )
166 report( "esa ads:" );
171 static int check_dct( int cpu_ref, int cpu_new )
173 x264_dct_function_t dct_c;
174 x264_dct_function_t dct_ref;
175 x264_dct_function_t dct_asm;
176 int ret = 0, ok, used_asm;
177 int16_t dct1[16][4][4] __attribute__((aligned(16)));
178 int16_t dct2[16][4][4] __attribute__((aligned(16)));
180 x264_dct_init( 0, &dct_c );
181 x264_dct_init( cpu_ref, &dct_ref);
182 x264_dct_init( cpu_new, &dct_asm );
183 #define TEST_DCT( name, t1, t2, size ) \
184 if( dct_asm.name != dct_ref.name ) \
187 dct_c.name( t1, buf1, buf2 ); \
188 dct_asm.name( t2, buf1, buf2 ); \
189 if( memcmp( t1, t2, size ) ) \
192 fprintf( stderr, #name " [FAILED]\n" ); \
195 ok = 1; used_asm = 0;
196 TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
197 TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
198 TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
199 report( "sub_dct4 :" );
201 ok = 1; used_asm = 0;
202 TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
203 TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
204 report( "sub_dct8 :" );
207 /* copy coefs because idct8 modifies them in place */
208 memcpy( buf5, dct1, 512 );
210 #define TEST_IDCT( name ) \
211 if( dct_asm.name != dct_ref.name ) \
214 memcpy( buf3, buf1, 32*32 ); \
215 memcpy( buf4, buf1, 32*32 ); \
216 memcpy( dct1, buf5, 512 ); \
217 memcpy( dct2, buf5, 512 ); \
218 dct_c.name( buf3, (void*)dct1 ); \
219 dct_asm.name( buf4, (void*)dct2 ); \
220 if( memcmp( buf3, buf4, 32*32 ) ) \
223 fprintf( stderr, #name " [FAILED]\n" ); \
226 ok = 1; used_asm = 0;
227 TEST_IDCT( add4x4_idct );
228 TEST_IDCT( add8x8_idct );
229 TEST_IDCT( add16x16_idct );
230 report( "add_idct4 :" );
232 ok = 1; used_asm = 0;
233 TEST_IDCT( add8x8_idct8 );
234 TEST_IDCT( add16x16_idct8 );
235 report( "add_idct8 :" );
238 ok = 1; used_asm = 0;
239 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
241 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
242 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
244 dct_c.dct4x4dc( dct1 );
245 dct_asm.dct4x4dc( dct2 );
246 if( memcmp( dct1, dct2, 32 ) )
249 fprintf( stderr, " - dct4x4dc : [FAILED]\n" );
252 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
254 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
255 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
257 dct_c.idct4x4dc( dct1 );
258 dct_asm.idct4x4dc( dct2 );
259 if( memcmp( dct1, dct2, 32 ) )
262 fprintf( stderr, " - idct4x4dc : [FAILED]\n" );
265 report( "(i)dct4x4dc :" );
267 ok = 1; used_asm = 0;
268 if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )
270 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
271 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
273 dct_c.dct2x2dc( dct1 );
274 dct_asm.dct2x2dc( dct2 );
275 if( memcmp( dct1, dct2, 4*2 ) )
278 fprintf( stderr, " - dct2x2dc : [FAILED]\n" );
281 if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )
283 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
284 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
286 dct_c.idct2x2dc( dct1 );
287 dct_asm.idct2x2dc( dct2 );
288 if( memcmp( dct1, dct2, 4*2 ) )
291 fprintf( stderr, " - idct2x2dc : [FAILED]\n" );
294 report( "(i)dct2x2dc :" );
299 static int check_mc( int cpu_ref, int cpu_new )
301 x264_mc_functions_t mc_c;
302 x264_mc_functions_t mc_ref;
303 x264_mc_functions_t mc_a;
304 x264_pixel_function_t pixel;
306 uint8_t *src = &buf1[2*32+2];
307 uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2],
308 &buf1[12*32+2], &buf1[17*32+2] };
309 uint8_t *dst1 = &buf3[2*32+2];
310 uint8_t *dst2 = &buf4[2*32+2];
313 int ret = 0, ok, used_asm;
315 x264_mc_init( 0, &mc_c );
316 x264_mc_init( cpu_ref, &mc_ref );
317 x264_mc_init( cpu_new, &mc_a );
318 x264_pixel_init( 0, &pixel );
320 #define MC_TEST_LUMA( w, h ) \
321 if( mc_a.mc_luma != mc_ref.mc_luma ) \
324 memset(buf3, 0xCD, 1024); \
325 memset(buf4, 0xCD, 1024); \
326 mc_c.mc_luma( dst1, 16, src2, 32, dx, dy, w, h ); \
327 mc_a.mc_luma( dst2, 16, src2, 32, dx, dy, w, h ); \
328 if( memcmp( buf3, buf4, 1024 ) ) \
330 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
334 if( mc_a.get_ref != mc_ref.get_ref ) \
336 uint8_t *ref = dst2; \
337 int ref_stride = 16; \
339 memset(buf3, 0xCD, 1024); \
340 memset(buf4, 0xCD, 1024); \
341 mc_c.mc_luma( dst1, 16, src2, 32, dx, dy, w, h ); \
342 ref = mc_a.get_ref( ref, &ref_stride, src2, 32, dx, dy, w, h ); \
343 if( pixel.sad[PIXEL_##w##x##h]( dst1, 16, ref, ref_stride ) ) \
345 fprintf( stderr, "get_ref[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
350 #define MC_TEST_CHROMA( w, h ) \
351 if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
354 memset(buf3, 0xCD, 1024); \
355 memset(buf4, 0xCD, 1024); \
356 mc_c.mc_chroma( dst1, 16, src, 32, dx, dy, w, h ); \
357 mc_a.mc_chroma( dst2, 16, src, 32, dx, dy, w, h ); \
358 /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\
359 for( j=0; j<h; j++ ) \
360 for( i=w; i<4; i++ ) \
361 dst2[i+j*16] = dst1[i+j*16]; \
362 if( memcmp( buf3, buf4, 1024 ) ) \
364 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
368 ok = 1; used_asm = 0;
369 for( dy = -8; dy < 8; dy++ )
370 for( dx = -8; dx < 8; dx++ )
372 MC_TEST_LUMA( 16, 16 );
373 MC_TEST_LUMA( 16, 8 );
374 MC_TEST_LUMA( 8, 16 );
375 MC_TEST_LUMA( 8, 8 );
376 MC_TEST_LUMA( 8, 4 );
377 MC_TEST_LUMA( 4, 8 );
378 MC_TEST_LUMA( 4, 4 );
380 report( "mc luma :" );
382 ok = 1; used_asm = 0;
383 for( dy = -1; dy < 9; dy++ )
384 for( dx = -1; dx < 9; dx++ )
386 MC_TEST_CHROMA( 8, 8 );
387 MC_TEST_CHROMA( 8, 4 );
388 MC_TEST_CHROMA( 4, 8 );
389 MC_TEST_CHROMA( 4, 4 );
390 MC_TEST_CHROMA( 4, 2 );
391 MC_TEST_CHROMA( 2, 4 );
392 MC_TEST_CHROMA( 2, 2 );
394 report( "mc chroma :" );
396 #undef MC_TEST_CHROMA
398 #define MC_TEST_AVG( name, ... ) \
399 for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \
401 memcpy( buf3, buf1, 1024 ); \
402 memcpy( buf4, buf1, 1024 ); \
403 if( mc_a.name[i] != mc_ref.name[i] ) \
406 mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
407 mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
408 if( memcmp( buf3, buf4, 1024 ) ) \
411 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
416 report( "mc avg :" );
417 ok = 1; used_asm = 0;
418 for( w = -64; w <= 128 && ok; w++ )
419 MC_TEST_AVG( avg_weight, w );
420 report( "mc wpredb :" );
425 static int check_deblock( int cpu_ref, int cpu_new )
427 x264_deblock_function_t db_c;
428 x264_deblock_function_t db_ref;
429 x264_deblock_function_t db_a;
430 int ret = 0, ok = 1, used_asm = 0;
431 int alphas[36], betas[36];
435 x264_deblock_init( 0, &db_c );
436 x264_deblock_init( cpu_ref, &db_ref );
437 x264_deblock_init( cpu_new, &db_a );
439 /* not exactly the real values of a,b,tc but close enough */
441 for( i = 35; i >= 0; i-- )
445 tcs[i][0] = tcs[i][2] = (c+6)/10;
446 tcs[i][1] = tcs[i][3] = (c+9)/20;
451 #define TEST_DEBLOCK( name, ... ) \
452 for( i = 0; i < 36; i++ ) \
454 for( j = 0; j < 1024; j++ ) \
455 /* two distributions of random to excersize different failure modes */\
456 buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \
457 memcpy( buf3, buf1, 1024 ); \
458 memcpy( buf4, buf1, 1024 ); \
459 if( db_a.name != db_ref.name ) \
462 db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
463 db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
464 if( memcmp( buf3, buf4, 1024 ) ) \
467 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
473 TEST_DEBLOCK( deblock_h_luma, tcs[i] );
474 TEST_DEBLOCK( deblock_v_luma, tcs[i] );
475 TEST_DEBLOCK( deblock_h_chroma, tcs[i] );
476 TEST_DEBLOCK( deblock_v_chroma, tcs[i] );
477 TEST_DEBLOCK( deblock_h_luma_intra );
478 TEST_DEBLOCK( deblock_v_luma_intra );
479 TEST_DEBLOCK( deblock_h_chroma_intra );
480 TEST_DEBLOCK( deblock_v_chroma_intra );
482 report( "deblock :" );
487 static int check_quant( int cpu_ref, int cpu_new )
489 x264_quant_function_t qf_c;
490 x264_quant_function_t qf_ref;
491 x264_quant_function_t qf_a;
492 int16_t dct1[64] __attribute__((__aligned__(16)));
493 int16_t dct2[64] __attribute__((__aligned__(16)));
494 uint8_t cqm_buf[64] __attribute__((__aligned__(16)));
495 int ret = 0, ok, used_asm;
496 int oks[2] = {1,1}, used_asms[2] = {0,0};
500 h->pps = h->pps_array;
501 x264_param_default( &h->param );
502 h->param.rc.i_qp_min = 26;
504 for( i_cqm = 0; i_cqm < 4; i_cqm++ )
507 for( i = 0; i < 6; i++ )
508 h->pps->scaling_list[i] = x264_cqm_flat16;
509 else if( i_cqm == 1 )
510 for( i = 0; i < 6; i++ )
511 h->pps->scaling_list[i] = x264_cqm_jvt[i];
515 for( i = 0; i < 64; i++ )
516 cqm_buf[i] = 10 + rand() % 246;
518 for( i = 0; i < 64; i++ )
520 for( i = 0; i < 6; i++ )
521 h->pps->scaling_list[i] = cqm_buf;
525 x264_quant_init( h, 0, &qf_c );
526 x264_quant_init( h, cpu_ref, &qf_ref );
527 x264_quant_init( h, cpu_new, &qf_a );
529 #define INIT_QUANT8() \
531 static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \
533 for( y = 0; y < 8; y++ ) \
534 for( x = 0; x < 8; x++ ) \
536 unsigned int scale = (255*scale1d[y]*scale1d[x])/16; \
537 dct1[y*8+x] = dct2[y*8+x] = (rand()%(2*scale+1))-scale; \
541 #define INIT_QUANT4() \
543 static const int scale1d[4] = {4,6,4,6}; \
545 for( y = 0; y < 4; y++ ) \
546 for( x = 0; x < 4; x++ ) \
548 unsigned int scale = 255*scale1d[y]*scale1d[x]; \
549 dct1[y*4+x] = dct2[y*4+x] = (rand()%(2*scale+1))-scale; \
553 #define TEST_QUANT_DC( name, cqm ) \
554 if( qf_a.name != qf_ref.name ) \
557 for( qp = 51; qp > 0; qp-- ) \
559 for( i = 0; i < 16; i++ ) \
560 dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
561 qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
562 qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
563 if( memcmp( dct1, dct2, 16*2 ) ) \
566 fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
572 #define TEST_QUANT( qname, block, w ) \
573 if( qf_a.qname != qf_ref.qname ) \
576 for( qp = 51; qp > 0; qp-- ) \
579 qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
580 qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
581 if( memcmp( dct1, dct2, w*w*2 ) ) \
584 fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
590 TEST_QUANT( quant_8x8, CQM_8IY, 8 );
591 TEST_QUANT( quant_8x8, CQM_8PY, 8 );
592 TEST_QUANT( quant_4x4, CQM_4IY, 4 );
593 TEST_QUANT( quant_4x4, CQM_4PY, 4 );
594 TEST_QUANT_DC( quant_4x4_dc, **h->quant4_mf[CQM_4IY] );
595 TEST_QUANT_DC( quant_2x2_dc, **h->quant4_mf[CQM_4IC] );
597 #define TEST_DEQUANT( qname, dqname, block, w ) \
598 if( qf_a.dqname != qf_ref.dqname ) \
601 for( qp = 51; qp > 0; qp-- ) \
604 qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
605 memcpy( dct2, dct1, w*w*2 ); \
606 qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \
607 qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \
608 if( memcmp( dct1, dct2, w*w*2 ) ) \
611 fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
617 TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8IY, 8 );
618 TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8PY, 8 );
619 TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 );
620 TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 );
623 ok = oks[0]; used_asm = used_asms[0];
626 ok = oks[1]; used_asm = used_asms[1];
627 report( "dequant :" );
632 static int check_intra( int cpu_ref, int cpu_new )
634 int ret = 0, ok = 1, used_asm = 0;
636 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
639 x264_predict_t predict_16x16[4+3];
640 x264_predict_t predict_8x8c[4+3];
641 x264_predict8x8_t predict_8x8[9+3];
642 x264_predict_t predict_4x4[9+3];
643 } ip_c, ip_ref, ip_a;
645 x264_predict_16x16_init( 0, ip_c.predict_16x16 );
646 x264_predict_8x8c_init( 0, ip_c.predict_8x8c );
647 x264_predict_8x8_init( 0, ip_c.predict_8x8 );
648 x264_predict_4x4_init( 0, ip_c.predict_4x4 );
650 x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 );
651 x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c );
652 x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 );
653 x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 );
655 x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 );
656 x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c );
657 x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 );
658 x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );
660 x264_predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
662 #define INTRA_TEST( name, dir, ... ) \
663 if( ip_a.name[dir] != ip_ref.name[dir] )\
666 memcpy( buf3, buf1, 32*20 );\
667 memcpy( buf4, buf1, 32*20 );\
668 ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
669 ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
670 if( memcmp( buf3, buf4, 32*20 ) )\
672 fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
675 for(k=-1; k<16; k++)\
676 printf("%2x ", edge[16+k]);\
679 printf("%2x ", edge[j]);\
681 printf("%2x ", buf4[48+k+j*32]);\
688 printf("%2x ", buf3[48+k+j*32]);\
694 for( i = 0; i < 12; i++ )
695 INTRA_TEST( predict_4x4, i );
696 for( i = 0; i < 7; i++ )
697 INTRA_TEST( predict_8x8c, i );
698 for( i = 0; i < 7; i++ )
699 INTRA_TEST( predict_16x16, i );
700 for( i = 0; i < 12; i++ )
701 INTRA_TEST( predict_8x8, i, edge );
703 report( "intra pred :" );
707 int check_all( int cpu_ref, int cpu_new )
709 return check_pixel( cpu_ref, cpu_new )
710 + check_dct( cpu_ref, cpu_new )
711 + check_mc( cpu_ref, cpu_new )
712 + check_intra( cpu_ref, cpu_new )
713 + check_deblock( cpu_ref, cpu_new )
714 + check_quant( cpu_ref, cpu_new );
717 int main(int argc, char *argv[])
720 int cpu0 = 0, cpu1 = 0;
723 buf1 = x264_malloc( 1024 ); /* 32 x 32 */
724 buf2 = x264_malloc( 1024 );
725 buf3 = x264_malloc( 1024 );
726 buf4 = x264_malloc( 1024 );
727 buf5 = x264_malloc( 1024 );
729 i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate();
730 fprintf( stderr, "x264: using random seed %u\n", i );
733 for( i = 0; i < 1024; i++ )
735 buf1[i] = rand() & 0xFF;
736 buf2[i] = rand() & 0xFF;
737 buf3[i] = buf4[i] = 0;
741 fprintf( stderr, "x264: MMXEXT against C\n" );
742 cpu1 = X264_CPU_MMX | X264_CPU_MMXEXT;
743 ret = check_all( 0, cpu1 );
745 if( x264_cpu_detect() & X264_CPU_SSE2 )
747 fprintf( stderr, "\nx264: SSE2 against C\n" );
749 cpu1 |= X264_CPU_SSE | X264_CPU_SSE2;
750 ret |= check_all( cpu0, cpu1 );
752 if( x264_cpu_detect() & X264_CPU_SSSE3 )
754 fprintf( stderr, "\nx264: SSSE3 against C\n" );
756 cpu1 |= X264_CPU_SSE3 | X264_CPU_SSSE3;
757 ret |= check_all( cpu0, cpu1 );
761 if( x264_cpu_detect() & X264_CPU_ALTIVEC )
763 fprintf( stderr, "x264: ALTIVEC against C\n" );
764 ret = check_all( 0, X264_CPU_ALTIVEC );
770 fprintf( stderr, "x264: All tests passed Yeah :)\n" );
773 fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" );