]> git.sesse.net Git - x264/blobdiff - common/cpu.c
Make x264_encoder_reconfig more threadsafe
[x264] / common / cpu.c
index bb707db3f7c35ccfee93b8c7fa381d090fed71d8..0e36a89fdeb30f59246826beb4ab71245efad5c3 100644 (file)
@@ -74,7 +74,6 @@ const x264_cpu_name_t x264_cpu_names[] =
 #undef MMX2
     {"Cache32",         X264_CPU_CACHELINE_32},
     {"Cache64",         X264_CPU_CACHELINE_64},
-    {"SSEMisalign",     X264_CPU_SSE_MISALIGN},
     {"LZCNT",           X264_CPU_LZCNT},
     {"BMI1",            X264_CPU_BMI1},
     {"BMI2",            X264_CPU_BMI1|X264_CPU_BMI2},
@@ -123,7 +122,7 @@ uint32_t x264_cpu_detect( void )
     uint32_t cpu = 0;
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = {0};
-    uint32_t max_extended_cap;
+    uint32_t max_extended_cap, max_basic_cap;
     int cache;
 
 #if !ARCH_X86_64
@@ -132,7 +131,8 @@ uint32_t x264_cpu_detect( void )
 #endif
 
     x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
-    if( eax == 0 )
+    max_basic_cap = eax;
+    if( max_basic_cap == 0 )
         return 0;
 
     x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
@@ -169,15 +169,18 @@ uint32_t x264_cpu_detect( void )
         }
     }
 
-    x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
-    /* AVX2 requires OS support, but BMI1/2 don't. */
-    if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
-        cpu |= X264_CPU_AVX2;
-    if( ebx&0x00000008 )
+    if( max_basic_cap >= 7 )
     {
-        cpu |= X264_CPU_BMI1;
-        if( ebx&0x00000100 )
-            cpu |= X264_CPU_BMI2;
+        x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+        /* AVX2 requires OS support, but BMI1/2 don't. */
+        if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+            cpu |= X264_CPU_AVX2;
+        if( ebx&0x00000008 )
+        {
+            cpu |= X264_CPU_BMI1;
+            if( ebx&0x00000100 )
+                cpu |= X264_CPU_BMI2;
+        }
     }
 
     if( cpu & X264_CPU_SSSE3 )
@@ -202,12 +205,12 @@ uint32_t x264_cpu_detect( void )
                 cpu |= X264_CPU_SSE2_IS_SLOW;  /* Bobcat has 64-bit SIMD units */
                 cpu |= X264_CPU_SLOW_PALIGNR;  /* palignr is insanely slow on Bobcat */
             }
-        }
-
-        if( ecx&0x00000080 ) /* Misalign SSE */
-        {
-            cpu |= X264_CPU_SSE_MISALIGN;
-            x264_cpu_mask_misalign_sse();
+            if( family == 0x16 )
+            {
+                cpu |= X264_CPU_SLOW_PSHUFB;   /* Jaguar's pshufb isn't that slow, but it's slow enough
+                                                * compared to alternate instruction sequences that this
+                                                * is equal or faster on almost all such functions. */
+            }
         }
 
         if( cpu & X264_CPU_AVX )
@@ -268,7 +271,7 @@ uint32_t x264_cpu_detect( void )
             x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
             cache = ecx&0xff; // cacheline size
         }
-        if( !cache )
+        if( !cache && max_basic_cap >= 2 )
         {
             // Cache and TLB Information
             static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };