]> git.sesse.net Git - x264/blobdiff - tools/checkasm.c
move os/compiler specific defines to their own header
[x264] / tools / checkasm.c
index 888cd14c4bbbffb82d878c43bbc1db2f17aa1e09..07c9210f39077ec2e9869a1303d09ac8d3826d3c 100644 (file)
@@ -1,10 +1,9 @@
-#include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
+#include <math.h>
 
 #include "common/common.h"
 #include "common/cpu.h"
-#ifdef HAVE_MMXEXT
+#ifdef HAVE_MMX
 #include "common/i386/pixel.h"
 #include "common/i386/dct.h"
 #include "common/i386/mc.h"
@@ -56,8 +55,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
         if( pixel_asm.name[i] != pixel_ref.name[i] ) \
         { \
             used_asm = 1; \
-            res_c   = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
-            res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
+            res_c   = pixel_c.name[i]( buf1, 32, buf2, 16 ); \
+            res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \
             if( res_c != res_asm ) \
             { \
                 ok = 0; \
@@ -79,16 +78,16 @@ static int check_pixel( int cpu_ref, int cpu_new )
         if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
         { \
             used_asm = 1; \
-            res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 24 ); \
-            res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 24 ); \
-            res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 24 ); \
+            res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \
+            res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \
+            res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \
             if(N==4) \
             { \
-                res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 24 ); \
-                pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 24, res_asm ); \
+                res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \
+                pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \
             } \
             else \
-                pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 24, res_asm ); \
+                pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \
             if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
             { \
                 ok = 0; \
@@ -139,7 +138,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
         x264_cpu_restore( cpu_new );
         res_c = x264_pixel_ssim_wxh( &pixel_c,   buf1+2, 32, buf2+2, 32, 32, 28 );
         res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
-        if( res_c != res_a )
+        if( fabs(res_c - res_a) > 1e-8 )
         {
             ok = 0;
             fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
@@ -175,8 +174,8 @@ static int check_dct( int cpu_ref, int cpu_new )
     x264_dct_function_t dct_ref;
     x264_dct_function_t dct_asm;
     int ret = 0, ok, used_asm;
-    int16_t dct1[16][4][4] __attribute((aligned(16)));
-    int16_t dct2[16][4][4] __attribute((aligned(16)));
+    int16_t dct1[16][4][4] __attribute__((aligned(16)));
+    int16_t dct2[16][4][4] __attribute__((aligned(16)));
 
     x264_dct_init( 0, &dct_c );
     x264_dct_init( cpu_ref, &dct_ref);
@@ -387,8 +386,8 @@ static int check_mc( int cpu_ref, int cpu_new )
         if( mc_a.name[i] != mc_ref.name[i] ) \
         { \
             used_asm = 1; \
-            mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
-            mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
+            mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
+            mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
             if( memcmp( buf3, buf4, 1024 ) )               \
             { \
                 ok = 0; \
@@ -472,15 +471,17 @@ static int check_quant( int cpu_ref, int cpu_new )
     x264_quant_function_t qf_c;
     x264_quant_function_t qf_ref;
     x264_quant_function_t qf_a;
-    int16_t dct1[64], dct2[64];
-    uint8_t cqm_buf[64];
+    int16_t dct1[64]    __attribute__((__aligned__(16)));
+    int16_t dct2[64]    __attribute__((__aligned__(16)));
+    uint8_t cqm_buf[64] __attribute__((__aligned__(16)));
     int ret = 0, ok, used_asm;
     int oks[2] = {1,1}, used_asms[2] = {0,0};
-    int i, i_cqm;
+    int i, i_cqm, qp;
     x264_t h_buf;
     x264_t *h = &h_buf;
     h->pps = h->pps_array;
     x264_param_default( &h->param );
+    h->param.rc.i_qp_min = 26;
 
     for( i_cqm = 0; i_cqm < 4; i_cqm++ )
     {
@@ -531,112 +532,74 @@ static int check_quant( int cpu_ref, int cpu_new )
                 } \
         }
 
-#define TEST_QUANT( name, cqm ) \
+#define TEST_QUANT_DC( name, cqm ) \
         if( qf_a.name != qf_ref.name ) \
         { \
-            used_asms[0] = 1; \
-            for( i = 0; i < 64; i++ ) \
-                dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
-            qf_c.name( (void*)dct1, cqm, 20, (1<<20)/6 ); \
-            qf_a.name( (void*)dct2, cqm, 20, (1<<20)/6 ); \
-            if( memcmp( dct1, dct2, 64*2 ) )       \
-            { \
-                oks[0] = 0; \
-                fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
-            } \
-        }
-
-#define TEST_QUANT8( qname, cqm, shift, divider ) \
-        if( qf_a.qname != qf_ref.qname ) \
-        { \
-            int qp; \
             used_asms[0] = 1; \
             for( qp = 51; qp > 0; qp-- ) \
             { \
-                INIT_QUANT8() \
-                qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
-                qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
-                if( memcmp( dct1, dct2, 64*2 ) ) \
+                for( i = 0; i < 16; i++ ) \
+                    dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \
+                qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
+                qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
+                if( memcmp( dct1, dct2, 16*2 ) )       \
                 { \
                     oks[0] = 0; \
-                    fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
+                    fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
                     break; \
                 } \
             } \
         }
 
-#define TEST_QUANT4( qname, cqm, shift, divider ) \
+#define TEST_QUANT( qname, block, w ) \
         if( qf_a.qname != qf_ref.qname ) \
         { \
-            int qp; \
             used_asms[0] = 1; \
             for( qp = 51; qp > 0; qp-- ) \
             { \
-                INIT_QUANT4() \
-                qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
-                qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
-                if( memcmp( dct1, dct2, 16*2 ) ) \
+                INIT_QUANT##w() \
+                qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+                qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+                if( memcmp( dct1, dct2, w*w*2 ) ) \
                 { \
                     oks[0] = 0; \
-                    fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
+                    fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
                     break; \
                 } \
             } \
         }
 
-        TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8IY], 16, 3 );
-        TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8PY], 16, 6 );
-        TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4IY], 15, 3 );
-        TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4PY], 15, 6 );
-        TEST_QUANT( quant_4x4_dc_core, ***h->quant4_mf[CQM_4IY] );
-        TEST_QUANT( quant_2x2_dc_core, ***h->quant4_mf[CQM_4IC] );
-
-#define TEST_DEQUANT8( qname, dqname, cqm, dqm, shift, divider ) \
-        if( qf_a.dqname != qf_ref.dqname ) \
-        { \
-            int qp; \
-            used_asms[1] = 1; \
-            for( qp = 51; qp > 0; qp-- ) \
-            { \
-                INIT_QUANT8() \
-                qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
-                memcpy( dct2, dct1, 64*2 ); \
-                qf_c.dqname( (void*)dct1, dqm, qp ); \
-                qf_a.dqname( (void*)dct2, dqm, qp ); \
-                if( memcmp( dct1, dct2, 64*2 ) ) \
-                { \
-                    oks[1] = 0; \
-                    fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
-                    break; \
-                } \
-            } \
-        }
+        TEST_QUANT( quant_8x8, CQM_8IY, 8 );
+        TEST_QUANT( quant_8x8, CQM_8PY, 8 );
+        TEST_QUANT( quant_4x4, CQM_4IY, 4 );
+        TEST_QUANT( quant_4x4, CQM_4PY, 4 );
+        TEST_QUANT_DC( quant_4x4_dc, **h->quant4_mf[CQM_4IY] );
+        TEST_QUANT_DC( quant_2x2_dc, **h->quant4_mf[CQM_4IC] );
 
-#define TEST_DEQUANT4( qname, dqname, cqm, dqm, shift, divider ) \
+#define TEST_DEQUANT( qname, dqname, block, w ) \
         if( qf_a.dqname != qf_ref.dqname ) \
         { \
-            int qp; \
             used_asms[1] = 1; \
             for( qp = 51; qp > 0; qp-- ) \
             { \
-                INIT_QUANT4() \
-                qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \
-                memcpy( dct2, dct1, 16*2 ); \
-                qf_c.dqname( (void*)dct1, dqm, qp ); \
-                qf_a.dqname( (void*)dct2, dqm, qp ); \
-                if( memcmp( dct1, dct2, 16*2 ) ) \
+                INIT_QUANT##w() \
+                qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+                memcpy( dct2, dct1, w*w*2 ); \
+                qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \
+                qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \
+                if( memcmp( dct1, dct2, w*w*2 ) ) \
                 { \
                     oks[1] = 0; \
-                    fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \
+                    fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
                     break; \
                 } \
             } \
         }
 
-        TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8IY], h->dequant8_mf[CQM_8IY], 16, 3 );
-        TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8PY], h->dequant8_mf[CQM_8PY], 16, 6 );
-        TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4IY], h->dequant4_mf[CQM_4IY], 15, 3 );
-        TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4PY], h->dequant4_mf[CQM_4PY], 15, 6 );
+        TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8IY, 8 );
+        TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8PY, 8 );
+        TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 );
+        TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 );
     }
 
     ok = oks[0]; used_asm = used_asms[0];
@@ -736,6 +699,7 @@ int check_all( int cpu_ref, int cpu_new )
 int main(int argc, char *argv[])
 {
     int ret = 0;
+    int cpu0 = 0, cpu1 = 0;
     int i;
 
     buf1 = x264_malloc( 1024 ); /* 32 x 32 */
@@ -755,20 +719,32 @@ int main(int argc, char *argv[])
         buf3[i] = buf4[i] = 0;
     }
 
-#ifdef HAVE_MMXEXT
+#ifdef HAVE_MMX
     fprintf( stderr, "x264: MMXEXT against C\n" );
-    ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT );
-#ifdef HAVE_SSE2
+    cpu1 = X264_CPU_MMX | X264_CPU_MMXEXT;
+    ret = check_all( 0, cpu1 );
+
     if( x264_cpu_detect() & X264_CPU_SSE2 )
     {
         fprintf( stderr, "\nx264: SSE2 against C\n" );
-        ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT,
-                          X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 );
+        cpu0 = cpu1;
+        cpu1 |= X264_CPU_SSE | X264_CPU_SSE2;
+        ret |= check_all( cpu0, cpu1 );
+
+        if( x264_cpu_detect() & X264_CPU_SSSE3 )
+        {
+            fprintf( stderr, "\nx264: SSSE3 against C\n" );
+            cpu0 = cpu1;
+            cpu1 |= X264_CPU_SSE3 | X264_CPU_SSSE3;
+            ret |= check_all( cpu0, cpu1 );
+        }
     }
-#endif
 #elif ARCH_PPC
-    fprintf( stderr, "x264: ALTIVEC against C\n" );
-    ret = check_all( 0, X264_CPU_ALTIVEC );
+    if( x264_cpu_detect() & X264_CPU_ALTIVEC )
+    {
+        fprintf( stderr, "x264: ALTIVEC against C\n" );
+        ret = check_all( 0, X264_CPU_ALTIVEC );
+    }
 #endif
 
     if( ret == 0 )