-#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
+#include <math.h>
#include "common/common.h"
#include "common/cpu.h"
-#ifdef HAVE_MMXEXT
+#ifdef HAVE_MMX
#include "common/i386/pixel.h"
#include "common/i386/dct.h"
#include "common/i386/mc.h"
if( pixel_asm.name[i] != pixel_ref.name[i] ) \
{ \
used_asm = 1; \
- res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
- res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
+ res_c = pixel_c.name[i]( buf1, 32, buf2, 16 ); \
+ res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \
if( res_c != res_asm ) \
{ \
ok = 0; \
if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
{ \
used_asm = 1; \
- res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 24 ); \
- res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 24 ); \
- res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 24 ); \
+ res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \
+ res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \
+ res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \
if(N==4) \
{ \
- res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 24 ); \
- pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 24, res_asm ); \
+ res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \
+ pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \
} \
else \
- pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 24, res_asm ); \
+ pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \
if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
{ \
ok = 0; \
x264_cpu_restore( cpu_new );
res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28 );
res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
- if( res_c != res_a )
+ if( fabs(res_c - res_a) > 1e-8 )
{
ok = 0;
fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
x264_dct_function_t dct_ref;
x264_dct_function_t dct_asm;
int ret = 0, ok, used_asm;
- int16_t dct1[16][4][4] __attribute((aligned(16)));
- int16_t dct2[16][4][4] __attribute((aligned(16)));
+ int16_t dct1[16][4][4] __attribute__((aligned(16)));
+ int16_t dct2[16][4][4] __attribute__((aligned(16)));
x264_dct_init( 0, &dct_c );
x264_dct_init( cpu_ref, &dct_ref);
if( mc_a.name[i] != mc_ref.name[i] ) \
{ \
used_asm = 1; \
- mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
- mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
+ mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
+ mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
if( memcmp( buf3, buf4, 1024 ) ) \
{ \
ok = 0; \
x264_quant_function_t qf_c;
x264_quant_function_t qf_ref;
x264_quant_function_t qf_a;
- int16_t dct1[64], dct2[64];
- uint8_t cqm_buf[64];
+ int16_t dct1[64] __attribute__((__aligned__(16)));
+ int16_t dct2[64] __attribute__((__aligned__(16)));
+ uint8_t cqm_buf[64] __attribute__((__aligned__(16)));
int ret = 0, ok, used_asm;
int oks[2] = {1,1}, used_asms[2] = {0,0};
- int i, i_cqm;
+ int i, i_cqm, qp;
x264_t h_buf;
x264_t *h = &h_buf;
h->pps = h->pps_array;
x264_param_default( &h->param );
+ h->param.rc.i_qp_min = 26;
for( i_cqm = 0; i_cqm < 4; i_cqm++ )
{
} \
}
-#define TEST_QUANT( name, cqm ) \
+#define TEST_QUANT_DC( name, cqm ) \
if( qf_a.name != qf_ref.name ) \
{ \
- used_asms[0] = 1; \
- for( i = 0; i < 64; i++ ) \
- dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
- qf_c.name( (void*)dct1, cqm, 20, (1<<20)/6 ); \
- qf_a.name( (void*)dct2, cqm, 20, (1<<20)/6 ); \
- if( memcmp( dct1, dct2, 64*2 ) ) \
- { \
- oks[0] = 0; \
- fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
- } \
- }
-
-#define TEST_QUANT8( qname, cqm, shift, divider ) \
- if( qf_a.qname != qf_ref.qname ) \
- { \
- int qp; \
used_asms[0] = 1; \
for( qp = 51; qp > 0; qp-- ) \
{ \
- INIT_QUANT8() \
- qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
- qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
- if( memcmp( dct1, dct2, 64*2 ) ) \
+ for( i = 0; i < 16; i++ ) \
+ dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
+ qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
+ qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
+ if( memcmp( dct1, dct2, 16*2 ) ) \
{ \
oks[0] = 0; \
- fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
+ fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
break; \
} \
} \
}
-#define TEST_QUANT4( qname, cqm, shift, divider ) \
+#define TEST_QUANT( qname, block, w ) \
if( qf_a.qname != qf_ref.qname ) \
{ \
- int qp; \
used_asms[0] = 1; \
for( qp = 51; qp > 0; qp-- ) \
{ \
- INIT_QUANT4() \
- qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
- qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
- if( memcmp( dct1, dct2, 16*2 ) ) \
+ INIT_QUANT##w() \
+ qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+ qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+ if( memcmp( dct1, dct2, w*w*2 ) ) \
{ \
oks[0] = 0; \
- fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
+ fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
break; \
} \
} \
}
- TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8IY], 16, 3 );
- TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8PY], 16, 6 );
- TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4IY], 15, 3 );
- TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4PY], 15, 6 );
- TEST_QUANT( quant_4x4_dc_core, ***h->quant4_mf[CQM_4IY] );
- TEST_QUANT( quant_2x2_dc_core, ***h->quant4_mf[CQM_4IC] );
-
-#define TEST_DEQUANT8( qname, dqname, cqm, dqm, shift, divider ) \
- if( qf_a.dqname != qf_ref.dqname ) \
- { \
- int qp; \
- used_asms[1] = 1; \
- for( qp = 51; qp > 0; qp-- ) \
- { \
- INIT_QUANT8() \
- qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
- memcpy( dct2, dct1, 64*2 ); \
- qf_c.dqname( (void*)dct1, dqm, qp ); \
- qf_a.dqname( (void*)dct2, dqm, qp ); \
- if( memcmp( dct1, dct2, 64*2 ) ) \
- { \
- oks[1] = 0; \
- fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
- break; \
- } \
- } \
- }
+ TEST_QUANT( quant_8x8, CQM_8IY, 8 );
+ TEST_QUANT( quant_8x8, CQM_8PY, 8 );
+ TEST_QUANT( quant_4x4, CQM_4IY, 4 );
+ TEST_QUANT( quant_4x4, CQM_4PY, 4 );
+ TEST_QUANT_DC( quant_4x4_dc, **h->quant4_mf[CQM_4IY] );
+ TEST_QUANT_DC( quant_2x2_dc, **h->quant4_mf[CQM_4IC] );
-#define TEST_DEQUANT4( qname, dqname, cqm, dqm, shift, divider ) \
+#define TEST_DEQUANT( qname, dqname, block, w ) \
if( qf_a.dqname != qf_ref.dqname ) \
{ \
- int qp; \
used_asms[1] = 1; \
for( qp = 51; qp > 0; qp-- ) \
{ \
- INIT_QUANT4() \
- qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
- memcpy( dct2, dct1, 16*2 ); \
- qf_c.dqname( (void*)dct1, dqm, qp ); \
- qf_a.dqname( (void*)dct2, dqm, qp ); \
- if( memcmp( dct1, dct2, 16*2 ) ) \
+ INIT_QUANT##w() \
+ qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+ memcpy( dct2, dct1, w*w*2 ); \
+ qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \
+ qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \
+ if( memcmp( dct1, dct2, w*w*2 ) ) \
{ \
oks[1] = 0; \
- fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
+ fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
break; \
} \
} \
}
- TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8IY], h->dequant8_mf[CQM_8IY], 16, 3 );
- TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8PY], h->dequant8_mf[CQM_8PY], 16, 6 );
- TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4IY], h->dequant4_mf[CQM_4IY], 15, 3 );
- TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4PY], h->dequant4_mf[CQM_4PY], 15, 6 );
+ TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8IY, 8 );
+ TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8PY, 8 );
+ TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 );
+ TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 );
}
ok = oks[0]; used_asm = used_asms[0];
int main(int argc, char *argv[])
{
int ret = 0;
+ int cpu0 = 0, cpu1 = 0;
int i;
buf1 = x264_malloc( 1024 ); /* 32 x 32 */
buf3[i] = buf4[i] = 0;
}
-#ifdef HAVE_MMXEXT
+#ifdef HAVE_MMX
fprintf( stderr, "x264: MMXEXT against C\n" );
- ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT );
-#ifdef HAVE_SSE2
+ cpu1 = X264_CPU_MMX | X264_CPU_MMXEXT;
+ ret = check_all( 0, cpu1 );
+
if( x264_cpu_detect() & X264_CPU_SSE2 )
{
fprintf( stderr, "\nx264: SSE2 against C\n" );
- ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT,
- X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 );
+ cpu0 = cpu1;
+ cpu1 |= X264_CPU_SSE | X264_CPU_SSE2;
+ ret |= check_all( cpu0, cpu1 );
+
+ if( x264_cpu_detect() & X264_CPU_SSSE3 )
+ {
+ fprintf( stderr, "\nx264: SSSE3 against C\n" );
+ cpu0 = cpu1;
+ cpu1 |= X264_CPU_SSE3 | X264_CPU_SSSE3;
+ ret |= check_all( cpu0, cpu1 );
+ }
}
-#endif
#elif ARCH_PPC
- fprintf( stderr, "x264: ALTIVEC against C\n" );
- ret = check_all( 0, X264_CPU_ALTIVEC );
+ if( x264_cpu_detect() & X264_CPU_ALTIVEC )
+ {
+ fprintf( stderr, "x264: ALTIVEC against C\n" );
+ ret = check_all( 0, X264_CPU_ALTIVEC );
+ }
#endif
if( ret == 0 )