#undef MMX2
{"Cache32", X264_CPU_CACHELINE_32},
{"Cache64", X264_CPU_CACHELINE_64},
- {"SSEMisalign", X264_CPU_SSE_MISALIGN},
{"LZCNT", X264_CPU_LZCNT},
{"BMI1", X264_CPU_BMI1},
{"BMI2", X264_CPU_BMI1|X264_CPU_BMI2},
uint32_t cpu = 0;
uint32_t eax, ebx, ecx, edx;
uint32_t vendor[4] = {0};
- uint32_t max_extended_cap;
+ uint32_t max_extended_cap, max_basic_cap;
int cache;
#if !ARCH_X86_64
#endif
x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
- if( eax == 0 )
+ max_basic_cap = eax;
+ if( max_basic_cap == 0 )
return 0;
x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
}
}
- x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
- /* AVX2 requires OS support, but BMI1/2 don't. */
- if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
- cpu |= X264_CPU_AVX2;
- if( ebx&0x00000008 )
+ if( max_basic_cap >= 7 )
{
- cpu |= X264_CPU_BMI1;
- if( ebx&0x00000100 )
- cpu |= X264_CPU_BMI2;
+ x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+ /* AVX2 requires OS support, but BMI1/2 don't. */
+ if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+ cpu |= X264_CPU_AVX2;
+ if( ebx&0x00000008 )
+ {
+ cpu |= X264_CPU_BMI1;
+ if( ebx&0x00000100 )
+ cpu |= X264_CPU_BMI2;
+ }
}
if( cpu & X264_CPU_SSSE3 )
cpu |= X264_CPU_SSE2_IS_SLOW; /* Bobcat has 64-bit SIMD units */
cpu |= X264_CPU_SLOW_PALIGNR; /* palignr is insanely slow on Bobcat */
}
- }
-
- if( ecx&0x00000080 ) /* Misalign SSE */
- {
- cpu |= X264_CPU_SSE_MISALIGN;
- x264_cpu_mask_misalign_sse();
+ if( family == 0x16 )
+ {
+ cpu |= X264_CPU_SLOW_PSHUFB; /* Jaguar's pshufb isn't that slow, but it's slow enough
+ * compared to alternate instruction sequences that this
+ * is equal or faster on almost all such functions. */
+ }
}
if( cpu & X264_CPU_AVX )
x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
cache = ecx&0xff; // cacheline size
}
- if( !cache )
+ if( !cache && max_basic_cap >= 2 )
{
// Cache and TLB Information
static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };