checkasm: shut up gcc warnings, fix some naming of functions in results

author Anton Mitrofanov <Bugmaster@narod.ru>

Tue, 29 Nov 2011 21:45:13 +0000 (13:45 -0800)

committer Fiona Glaser <fiona@x264.com>

Thu, 1 Dec 2011 20:27:33 +0000 (12:27 -0800)
author Anton Mitrofanov <Bugmaster@narod.ru>
Tue, 29 Nov 2011 21:45:13 +0000 (13:45 -0800)
committer Fiona Glaser <fiona@x264.com>
Thu, 1 Dec 2011 20:27:33 +0000 (12:27 -0800)
diff --git a/tools/checkasm.c b/tools/checkasm.c

index c83b5991f39cdbf944bf594f87566cd9e1e55ed3..1f491cfa19eca8d8f6c268c5931cdbf2e1ec91b7 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -55,7 +55,7 @@ int quiet = 0;
  #define BENCH_RUNS 100  // tradeoff between accuracy and speed
  #define BENCH_ALIGNS 16 // number of stack+heap data alignments (another accuracy vs speed tradeoff)
  #define MAX_FUNCS 1000  // just has to be big enough to hold all the existing functions
-#define MAX_CPUS 10     // number of different combinations of cpu flags
+#define MAX_CPUS 30     // number of different combinations of cpu flags
  
  typedef struct
  {
@@ -168,11 +168,10 @@ static void print_bench(void)
                      b->cpu&X264_CPU_XOP ? "xop" :
                      b->cpu&X264_CPU_AVX ? "avx" :
                      b->cpu&X264_CPU_SSE4 ? "sse4" :
-                    b->cpu&X264_CPU_SHUFFLE_IS_FAST ? "fastshuffle" :
                      b->cpu&X264_CPU_SSSE3 ? "ssse3" :
                      b->cpu&X264_CPU_SSE3 ? "sse3" :
                      /* print sse2slow only if there's also a sse2fast version of the same func */
-                    b->cpu&X264_CPU_SSE2_IS_SLOW && j<MAX_CPUS && b[1].cpu&X264_CPU_SSE2_IS_FAST && !(b[1].cpu&X264_CPU_SSE3) ? "sse2slow" :
+                    b->cpu&X264_CPU_SSE2_IS_SLOW && j<MAX_CPUS-1 && b[1].cpu&X264_CPU_SSE2_IS_FAST && !(b[1].cpu&X264_CPU_SSE3) ? "sse2slow" :
                      b->cpu&X264_CPU_SSE2 ? "sse2" :
                      b->cpu&X264_CPU_MMX ? "mmx" :
                      b->cpu&X264_CPU_ALTIVEC ? "altivec" :
@@ -180,6 +179,7 @@ static void print_bench(void)
                      b->cpu&X264_CPU_ARMV6 ? "armv6" : "c",
                      b->cpu&X264_CPU_CACHELINE_32 ? "_c32" :
                      b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
+                    b->cpu&X264_CPU_SHUFFLE_IS_FAST && !(b->cpu&X264_CPU_SSE4) ? "_fastshuffle" :
                      b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
                      b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
                      b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
@@ -651,7 +651,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
          {
              ALIGNED_16( uint16_t sums[72] );
              ALIGNED_16( int dc[4] );
-            int16_t mvs_a[32], mvs_c[32];
+            ALIGNED_16( int16_t mvs_a[32] );
+            ALIGNED_16( int16_t mvs_c[32] );
              int mvn_a, mvn_c;
              int thresh = rand() & 0x3fff;
              set_func_name( "esa_ads" );
@@ -2295,6 +2296,9 @@ static int add_flags( int *cpu_ref, int *cpu_new, int flags, const char *name )
  {
      *cpu_ref = *cpu_new;
      *cpu_new |= flags;
+#if BROKEN_STACK_ALIGNMENT
+    *cpu_new |= X264_CPU_STACK_MOD4;
+#endif
      if( *cpu_new & X264_CPU_SSE2_IS_FAST )
          *cpu_new &= ~X264_CPU_SSE2_IS_SLOW;
      if( !quiet )
@@ -2329,6 +2333,7 @@ static int check_all_flags( void )
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE | X264_CPU_SSE2 | X264_CPU_SSE2_IS_SLOW, "SSE2Slow" );
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE2_IS_FAST, "SSE2Fast" );
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_64, "SSE2Fast Cache64" );
+        cpu1 &= ~X264_CPU_CACHELINE_64;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SHUFFLE_IS_FAST, "SSE2 FastShuffle" );
          cpu1 &= ~X264_CPU_SHUFFLE_IS_FAST;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" );
@@ -2338,23 +2343,24 @@ static int check_all_flags( void )
      }
      if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN )
      {
-        cpu1 &= ~X264_CPU_CACHELINE_64;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" );
          cpu1 &= ~X264_CPU_SSE_MISALIGN;
      }
      if( x264_cpu_detect() & X264_CPU_LZCNT )
      {
-        cpu1 &= ~X264_CPU_CACHELINE_64;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );
          cpu1 &= ~X264_CPU_LZCNT;
      }
      if( x264_cpu_detect() & X264_CPU_SSE3 )
+    {
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE3 | X264_CPU_CACHELINE_64, "SSE3" );
+        cpu1 &= ~X264_CPU_CACHELINE_64;
+    }
      if( x264_cpu_detect() & X264_CPU_SSSE3 )
      {
-        cpu1 &= ~X264_CPU_CACHELINE_64;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSSE3, "SSSE3" );
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_64, "SSSE3 Cache64" );
+        cpu1 &= ~X264_CPU_CACHELINE_64;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SHUFFLE_IS_FAST, "SSSE3 FastShuffle" );
          cpu1 &= ~X264_CPU_SHUFFLE_IS_FAST;
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSSE3 SlowCTZ" );
@@ -2363,10 +2369,7 @@ static int check_all_flags( void )
          cpu1 &= ~X264_CPU_SLOW_ATOM;
      }
      if( x264_cpu_detect() & X264_CPU_SSE4 )
-    {
-        cpu1 &= ~X264_CPU_CACHELINE_64;
-        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4, "SSE4" );
-    }
+        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4 | X264_CPU_SHUFFLE_IS_FAST, "SSE4" );
      if( x264_cpu_detect() & X264_CPU_AVX )
          ret |= add_flags( &cpu0, &cpu1, X264_CPU_AVX, "AVX" );
      if( x264_cpu_detect() & X264_CPU_XOP )
author	Anton Mitrofanov <Bugmaster@narod.ru>
	Tue, 29 Nov 2011 21:45:13 +0000 (13:45 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Thu, 1 Dec 2011 20:27:33 +0000 (12:27 -0800)