5 #include "common/common.h"
6 #include "common/cpu.h"
8 #include "common/i386/pixel.h"
9 #include "common/i386/dct.h"
10 #include "common/i386/mc.h"
13 #include "common/ppc/pixel.h"
14 #include "common/ppc/mc.h"
17 /* buf1, buf2: initialised to random data and shouldn't write into them */
18 uint8_t * buf1, * buf2;
19 /* buf3, buf4: used to store output */
20 uint8_t * buf3, * buf4;
24 #define report( name ) { \
26 fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \
30 static int check_pixel( int cpu_ref, int cpu_new )
32 x264_pixel_function_t pixel_c;
33 x264_pixel_function_t pixel_ref;
34 x264_pixel_function_t pixel_asm;
35 x264_predict_t predict_16x16[4+3];
36 x264_predict_t predict_8x8c[4+3];
37 x264_predict_t predict_4x4[9+3];
38 x264_predict8x8_t predict_8x8[9+3];
39 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
40 int ret = 0, ok, used_asm;
43 x264_pixel_init( 0, &pixel_c );
44 x264_pixel_init( cpu_ref, &pixel_ref );
45 x264_pixel_init( cpu_new, &pixel_asm );
46 x264_predict_16x16_init( 0, predict_16x16 );
47 x264_predict_8x8c_init( 0, predict_8x8c );
48 x264_predict_8x8_init( 0, predict_8x8 );
49 x264_predict_4x4_init( 0, predict_4x4 );
50 x264_predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
52 #define TEST_PIXEL( name ) \
53 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
56 if( pixel_asm.name[i] != pixel_ref.name[i] ) \
59 res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
60 res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
61 if( res_c != res_asm ) \
64 fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
68 report( "pixel " #name " :" );
75 #define TEST_PIXEL_X( N ) \
76 for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
78 int res_c[4]={0}, res_asm[4]={0}; \
79 if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
82 res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 24 ); \
83 res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 24 ); \
84 res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 24 ); \
87 res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 24 ); \
88 pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 24, res_asm ); \
91 pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 24, res_asm ); \
92 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
95 fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
96 i, res_c[0], res_c[1], res_c[2], res_c[3], \
97 res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
101 report( "pixel sad_x"#N" :" );
106 #define TEST_INTRA_SATD( name, pred, satd, i8x8, ... ) \
107 if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
109 int res_c[3], res_asm[3]; \
111 memcpy( buf3, buf2, 1024 ); \
112 for( i=0; i<3; i++ ) \
114 pred[i]( buf3+40, ##__VA_ARGS__ ); \
115 res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \
117 pixel_asm.name( buf1+40, i8x8 ? edge : buf3+40, res_asm ); \
118 if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
121 fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \
122 res_c[0], res_c[1], res_c[2], \
123 res_asm[0], res_asm[1], res_asm[2] ); \
127 ok = 1; used_asm = 0;
128 TEST_INTRA_SATD( intra_satd_x3_16x16, predict_16x16, satd[PIXEL_16x16], 0 );
129 TEST_INTRA_SATD( intra_satd_x3_8x8c, predict_8x8c, satd[PIXEL_8x8], 0 );
130 TEST_INTRA_SATD( intra_satd_x3_4x4, predict_4x4, satd[PIXEL_4x4], 0 );
131 TEST_INTRA_SATD( intra_sa8d_x3_8x8, predict_8x8, sa8d[PIXEL_8x8], 1, edge );
132 report( "intra satd_x3 :" );
137 static int check_dct( int cpu_ref, int cpu_new )
139 x264_dct_function_t dct_c;
140 x264_dct_function_t dct_ref;
141 x264_dct_function_t dct_asm;
142 int ret = 0, ok, used_asm;
143 int16_t dct1[16][4][4] __attribute((aligned(16)));
144 int16_t dct2[16][4][4] __attribute((aligned(16)));
146 x264_dct_init( 0, &dct_c );
147 x264_dct_init( cpu_ref, &dct_ref);
148 x264_dct_init( cpu_new, &dct_asm );
149 #define TEST_DCT( name, t1, t2, size ) \
150 if( dct_asm.name != dct_ref.name ) \
153 dct_c.name( t1, buf1, buf2 ); \
154 dct_asm.name( t2, buf1, buf2 ); \
155 if( memcmp( t1, t2, size ) ) \
158 fprintf( stderr, #name " [FAILED]\n" ); \
161 ok = 1; used_asm = 0;
162 TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
163 TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
164 TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
165 report( "sub_dct4 :" );
167 ok = 1; used_asm = 0;
168 TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
169 TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
170 report( "sub_dct8 :" );
173 /* copy coefs because idct8 modifies them in place */
174 memcpy( buf5, dct1, 512 );
176 #define TEST_IDCT( name ) \
177 if( dct_asm.name != dct_ref.name ) \
180 memcpy( buf3, buf1, 32*32 ); \
181 memcpy( buf4, buf1, 32*32 ); \
182 memcpy( dct1, buf5, 512 ); \
183 memcpy( dct2, buf5, 512 ); \
184 dct_c.name( buf3, (void*)dct1 ); \
185 dct_asm.name( buf4, (void*)dct2 ); \
186 if( memcmp( buf3, buf4, 32*32 ) ) \
189 fprintf( stderr, #name " [FAILED]\n" ); \
192 ok = 1; used_asm = 0;
193 TEST_IDCT( add4x4_idct );
194 TEST_IDCT( add8x8_idct );
195 TEST_IDCT( add16x16_idct );
196 report( "add_idct4 :" );
198 ok = 1; used_asm = 0;
199 TEST_IDCT( add8x8_idct8 );
200 TEST_IDCT( add16x16_idct8 );
201 report( "add_idct8 :" );
204 ok = 1; used_asm = 0;
205 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
207 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
208 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
210 dct_c.dct4x4dc( dct1 );
211 dct_asm.dct4x4dc( dct2 );
212 if( memcmp( dct1, dct2, 32 ) )
215 fprintf( stderr, " - dct4x4dc : [FAILED]\n" );
218 if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )
220 int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
221 int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};
223 dct_c.idct4x4dc( dct1 );
224 dct_asm.idct4x4dc( dct2 );
225 if( memcmp( dct1, dct2, 32 ) )
228 fprintf( stderr, " - idct4x4dc : [FAILED]\n" );
231 report( "(i)dct4x4dc :" );
233 ok = 1; used_asm = 0;
234 if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )
236 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
237 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
239 dct_c.dct2x2dc( dct1 );
240 dct_asm.dct2x2dc( dct2 );
241 if( memcmp( dct1, dct2, 4*2 ) )
244 fprintf( stderr, " - dct2x2dc : [FAILED]\n" );
247 if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )
249 int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
250 int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};
252 dct_c.idct2x2dc( dct1 );
253 dct_asm.idct2x2dc( dct2 );
254 if( memcmp( dct1, dct2, 4*2 ) )
257 fprintf( stderr, " - idct2x2dc : [FAILED]\n" );
260 report( "(i)dct2x2dc :" );
265 static int check_mc( int cpu_ref, int cpu_new )
267 x264_mc_functions_t mc_c;
268 x264_mc_functions_t mc_ref;
269 x264_mc_functions_t mc_a;
271 uint8_t *src = &buf1[2*32+2];
272 uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2],
273 &buf1[12*32+2], &buf1[17*32+2] };
274 uint8_t *dst1 = &buf3[2*32+2];
275 uint8_t *dst2 = &buf4[2*32+2];
278 int ret = 0, ok, used_asm;
280 x264_mc_init( 0, &mc_c );
281 x264_mc_init( cpu_ref, &mc_ref );
282 x264_mc_init( cpu_new, &mc_a );
284 #define MC_TEST_LUMA( w, h ) \
285 if( mc_a.mc_luma != mc_ref.mc_luma ) \
288 memset(buf3, 0xCD, 1024); \
289 memset(buf4, 0xCD, 1024); \
290 mc_c.mc_luma( src2, 32, dst1, 16, dx, dy, w, h ); \
291 mc_a.mc_luma( src2, 32, dst2, 16, dx, dy, w, h ); \
292 if( memcmp( buf3, buf4, 1024 ) ) \
294 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
299 #define MC_TEST_CHROMA( w, h ) \
300 if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
303 memset(buf3, 0xCD, 1024); \
304 memset(buf4, 0xCD, 1024); \
305 mc_c.mc_chroma( src, 32, dst1, 16, dx, dy, w, h ); \
306 mc_a.mc_chroma( src, 32, dst2, 16, dx, dy, w, h ); \
307 if( memcmp( buf3, buf4, 1024 ) ) \
309 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
313 ok = 1; used_asm = 0;
314 for( dy = 0; dy < 4; dy++ )
315 for( dx = 0; dx < 4; dx++ )
317 MC_TEST_LUMA( 16, 16 );
318 MC_TEST_LUMA( 16, 8 );
319 MC_TEST_LUMA( 8, 16 );
320 MC_TEST_LUMA( 8, 8 );
321 MC_TEST_LUMA( 8, 4 );
322 MC_TEST_LUMA( 4, 8 );
323 MC_TEST_LUMA( 4, 4 );
325 report( "mc luma :" );
327 ok = 1; used_asm = 0;
328 for( dy = 0; dy < 9; dy++ )
329 for( dx = 0; dx < 9; dx++ )
331 MC_TEST_CHROMA( 8, 8 );
332 MC_TEST_CHROMA( 8, 4 );
333 MC_TEST_CHROMA( 4, 8 );
334 MC_TEST_CHROMA( 4, 4 );
335 MC_TEST_CHROMA( 4, 2 );
336 MC_TEST_CHROMA( 2, 4 );
337 MC_TEST_CHROMA( 2, 2 );
339 report( "mc chroma :" );
341 #undef MC_TEST_CHROMA
343 #define MC_TEST_AVG( name, ... ) \
344 for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \
346 memcpy( buf3, buf1, 1024 ); \
347 memcpy( buf4, buf1, 1024 ); \
348 if( mc_a.name[i] != mc_ref.name[i] ) \
351 mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
352 mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
353 if( memcmp( buf3, buf4, 1024 ) ) \
356 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
361 report( "mc avg :" );
362 for( w = -64; w <= 128 && ok; w++ )
363 MC_TEST_AVG( avg_weight, w );
364 report( "mc wpredb :" );
369 static int check_deblock( int cpu_ref, int cpu_new )
371 x264_deblock_function_t db_c;
372 x264_deblock_function_t db_ref;
373 x264_deblock_function_t db_a;
374 int ret = 0, ok = 1, used_asm = 0;
375 int alphas[36], betas[36];
379 x264_deblock_init( 0, &db_c );
380 x264_deblock_init( cpu_ref, &db_ref );
381 x264_deblock_init( cpu_new, &db_a );
383 /* not exactly the real values of a,b,tc but close enough */
385 for( i = 35; i >= 0; i-- )
389 tcs[i][0] = tcs[i][2] = (c+6)/10;
390 tcs[i][1] = tcs[i][3] = (c+9)/20;
395 #define TEST_DEBLOCK( name, ... ) \
396 for( i = 0; i < 36; i++ ) \
398 for( j = 0; j < 1024; j++ ) \
399 /* two distributions of random to excersize different failure modes */\
400 buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \
401 memcpy( buf3, buf1, 1024 ); \
402 memcpy( buf4, buf1, 1024 ); \
403 if( db_a.name != db_ref.name ) \
406 db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
407 db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
408 if( memcmp( buf3, buf4, 1024 ) ) \
411 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
417 TEST_DEBLOCK( deblock_h_luma, tcs[i] );
418 TEST_DEBLOCK( deblock_v_luma, tcs[i] );
419 TEST_DEBLOCK( deblock_h_chroma, tcs[i] );
420 TEST_DEBLOCK( deblock_v_chroma, tcs[i] );
421 TEST_DEBLOCK( deblock_h_luma_intra );
422 TEST_DEBLOCK( deblock_v_luma_intra );
423 TEST_DEBLOCK( deblock_h_chroma_intra );
424 TEST_DEBLOCK( deblock_v_chroma_intra );
426 report( "deblock :" );
431 static int check_quant( int cpu_ref, int cpu_new )
433 x264_quant_function_t qf_c;
434 x264_quant_function_t qf_ref;
435 x264_quant_function_t qf_a;
436 int16_t dct1[64], dct2[64];
438 int ret = 0, ok, used_asm;
439 int oks[2] = {1,1}, used_asms[2] = {0,0};
443 h->pps = h->pps_array;
444 x264_param_default( &h->param );
446 for( i_cqm = 0; i_cqm < 4; i_cqm++ )
449 for( i = 0; i < 6; i++ )
450 h->pps->scaling_list[i] = x264_cqm_flat16;
451 else if( i_cqm == 1 )
452 for( i = 0; i < 6; i++ )
453 h->pps->scaling_list[i] = x264_cqm_jvt[i];
457 for( i = 0; i < 64; i++ )
458 cqm_buf[i] = 10 + rand() % 246;
460 for( i = 0; i < 64; i++ )
462 for( i = 0; i < 6; i++ )
463 h->pps->scaling_list[i] = cqm_buf;
467 x264_quant_init( h, 0, &qf_c );
468 x264_quant_init( h, cpu_ref, &qf_ref );
469 x264_quant_init( h, cpu_new, &qf_a );
471 #define INIT_QUANT8() \
473 static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \
475 for( y = 0; y < 8; y++ ) \
476 for( x = 0; x < 8; x++ ) \
478 unsigned int scale = (255*scale1d[y]*scale1d[x])/16; \
479 dct1[y*8+x] = dct2[y*8+x] = (rand()%(2*scale+1))-scale; \
483 #define INIT_QUANT4() \
485 static const int scale1d[4] = {4,6,4,6}; \
487 for( y = 0; y < 4; y++ ) \
488 for( x = 0; x < 4; x++ ) \
490 unsigned int scale = 255*scale1d[y]*scale1d[x]; \
491 dct1[y*4+x] = dct2[y*4+x] = (rand()%(2*scale+1))-scale; \
495 #define TEST_QUANT( name, cqm ) \
496 if( qf_a.name != qf_ref.name ) \
499 for( i = 0; i < 64; i++ ) \
500 dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
501 qf_c.name( (void*)dct1, cqm, 20, (1<<20)/6 ); \
502 qf_a.name( (void*)dct2, cqm, 20, (1<<20)/6 ); \
503 if( memcmp( dct1, dct2, 64*2 ) ) \
506 fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
510 #define TEST_QUANT8( qname, cqm, shift, divider ) \
511 if( qf_a.qname != qf_ref.qname ) \
515 for( qp = 51; qp > 0; qp-- ) \
518 qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
519 qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
520 if( memcmp( dct1, dct2, 64*2 ) ) \
523 fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
529 #define TEST_QUANT4( qname, cqm, shift, divider ) \
530 if( qf_a.qname != qf_ref.qname ) \
534 for( qp = 51; qp > 0; qp-- ) \
537 qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
538 qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
539 if( memcmp( dct1, dct2, 16*2 ) ) \
542 fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
548 TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8IY], 16, 3 );
549 TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8PY], 16, 6 );
550 TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4IY], 15, 3 );
551 TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4PY], 15, 6 );
552 TEST_QUANT( quant_4x4_dc_core, ***h->quant4_mf[CQM_4IY] );
553 TEST_QUANT( quant_2x2_dc_core, ***h->quant4_mf[CQM_4IC] );
555 #define TEST_DEQUANT8( qname, dqname, cqm, dqm, shift, divider ) \
556 if( qf_a.dqname != qf_ref.dqname ) \
560 for( qp = 51; qp > 0; qp-- ) \
563 qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
564 memcpy( dct2, dct1, 64*2 ); \
565 qf_c.dqname( (void*)dct1, dqm, qp ); \
566 qf_a.dqname( (void*)dct2, dqm, qp ); \
567 if( memcmp( dct1, dct2, 64*2 ) ) \
570 fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
576 #define TEST_DEQUANT4( qname, dqname, cqm, dqm, shift, divider ) \
577 if( qf_a.dqname != qf_ref.dqname ) \
581 for( qp = 51; qp > 0; qp-- ) \
584 qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
585 memcpy( dct2, dct1, 16*2 ); \
586 qf_c.dqname( (void*)dct1, dqm, qp ); \
587 qf_a.dqname( (void*)dct2, dqm, qp ); \
588 if( memcmp( dct1, dct2, 16*2 ) ) \
591 fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
597 TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8IY], h->dequant8_mf[CQM_8IY], 16, 3 );
598 TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8PY], h->dequant8_mf[CQM_8PY], 16, 6 );
599 TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4IY], h->dequant4_mf[CQM_4IY], 15, 3 );
600 TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4PY], h->dequant4_mf[CQM_4PY], 15, 6 );
603 ok = oks[0]; used_asm = used_asms[0];
606 ok = oks[1]; used_asm = used_asms[1];
607 report( "dequant :" );
612 static int check_intra( int cpu_ref, int cpu_new )
614 int ret = 0, ok = 1, used_asm = 0;
616 DECLARE_ALIGNED( uint8_t, edge[33], 8 );
619 x264_predict_t predict_16x16[4+3];
620 x264_predict_t predict_8x8c[4+3];
621 x264_predict8x8_t predict_8x8[9+3];
622 x264_predict_t predict_4x4[9+3];
623 } ip_c, ip_ref, ip_a;
625 x264_predict_16x16_init( 0, ip_c.predict_16x16 );
626 x264_predict_8x8c_init( 0, ip_c.predict_8x8c );
627 x264_predict_8x8_init( 0, ip_c.predict_8x8 );
628 x264_predict_4x4_init( 0, ip_c.predict_4x4 );
630 x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 );
631 x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c );
632 x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 );
633 x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 );
635 x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 );
636 x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c );
637 x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 );
638 x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );
640 x264_predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
642 #define INTRA_TEST( name, dir, ... ) \
643 if( ip_a.name[dir] != ip_ref.name[dir] )\
646 memcpy( buf3, buf1, 32*20 );\
647 memcpy( buf4, buf1, 32*20 );\
648 ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
649 ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
650 if( memcmp( buf3, buf4, 32*20 ) )\
652 fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
655 for(k=-1; k<16; k++)\
656 printf("%2x ", edge[16+k]);\
659 printf("%2x ", edge[j]);\
661 printf("%2x ", buf4[48+k+j*32]);\
668 printf("%2x ", buf3[48+k+j*32]);\
674 for( i = 0; i < 12; i++ )
675 INTRA_TEST( predict_4x4, i );
676 for( i = 0; i < 7; i++ )
677 INTRA_TEST( predict_8x8c, i );
678 for( i = 0; i < 7; i++ )
679 INTRA_TEST( predict_16x16, i );
680 for( i = 0; i < 12; i++ )
681 INTRA_TEST( predict_8x8, i, edge );
683 report( "intra pred :" );
687 int check_all( int cpu_ref, int cpu_new )
689 return check_pixel( cpu_ref, cpu_new )
690 + check_dct( cpu_ref, cpu_new )
691 + check_mc( cpu_ref, cpu_new )
692 + check_intra( cpu_ref, cpu_new )
693 + check_deblock( cpu_ref, cpu_new )
694 + check_quant( cpu_ref, cpu_new );
697 int main(int argc, char *argv[])
702 buf1 = x264_malloc( 1024 ); /* 32 x 32 */
703 buf2 = x264_malloc( 1024 );
704 buf3 = x264_malloc( 1024 );
705 buf4 = x264_malloc( 1024 );
706 buf5 = x264_malloc( 1024 );
708 i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate();
709 fprintf( stderr, "x264: using random seed %u\n", i );
712 for( i = 0; i < 1024; i++ )
714 buf1[i] = rand() & 0xFF;
715 buf2[i] = rand() & 0xFF;
716 buf3[i] = buf4[i] = 0;
720 fprintf( stderr, "x264: MMXEXT against C\n" );
721 ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT );
723 if( x264_cpu_detect() & X264_CPU_SSE2 )
725 fprintf( stderr, "\nx264: SSE2 against C\n" );
726 ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT,
727 X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 );
731 fprintf( stderr, "x264: ALTIVEC against C\n" );
732 ret = check_all( 0, X264_CPU_ALTIVEC );
737 fprintf( stderr, "x264: All tests passed Yeah :)\n" );
740 fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" );