/*****************************************************************************
* cpu.c: cpu detection
*****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2016 x264 project
*
* Authors: Loren Merritt <lorenm@u.washington.edu>
* Laurent Aimar <fenrir@via.ecp.fr>
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#define _GNU_SOURCE // for sched_getaffinity
#include "common.h"
#include "cpu.h"
{"AVX", AVX},
{"XOP", AVX|X264_CPU_XOP},
{"FMA4", AVX|X264_CPU_FMA4},
- {"AVX2", AVX|X264_CPU_AVX2},
{"FMA3", AVX|X264_CPU_FMA3},
+ {"AVX2", AVX|X264_CPU_FMA3|X264_CPU_AVX2},
#undef AVX
#undef SSE2
#undef MMX2
{"Cache32", X264_CPU_CACHELINE_32},
{"Cache64", X264_CPU_CACHELINE_64},
- {"SSEMisalign", X264_CPU_SSE_MISALIGN},
{"LZCNT", X264_CPU_LZCNT},
{"BMI1", X264_CPU_BMI1},
{"BMI2", X264_CPU_BMI1|X264_CPU_BMI2},
{"ARMv6", X264_CPU_ARMV6},
{"NEON", X264_CPU_NEON},
{"FastNeonMRC", X264_CPU_FAST_NEON_MRC},
+#elif ARCH_AARCH64
+ {"ARMv8", X264_CPU_ARMV8},
+ {"NEON", X264_CPU_NEON},
+#elif ARCH_MIPS
+ {"MSA", X264_CPU_MSA},
#endif
{"", 0},
};
uint32_t cpu = 0;
uint32_t eax, ebx, ecx, edx;
uint32_t vendor[4] = {0};
- uint32_t max_extended_cap;
+ uint32_t max_extended_cap, max_basic_cap;
int cache;
#if !ARCH_X86_64
#endif
x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
- if( eax == 0 )
+ max_basic_cap = eax;
+ if( max_basic_cap == 0 )
return 0;
x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
}
}
- x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
- /* AVX2 requires OS support, but BMI1/2 don't. */
- if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
- cpu |= X264_CPU_AVX2;
- if( ebx&0x00000008 )
+ if( max_basic_cap >= 7 )
{
- cpu |= X264_CPU_BMI1;
- if( ebx&0x00000100 )
- cpu |= X264_CPU_BMI2;
+ x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+ /* AVX2 requires OS support, but BMI1/2 don't. */
+ if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+ cpu |= X264_CPU_AVX2;
+ if( ebx&0x00000008 )
+ {
+ cpu |= X264_CPU_BMI1;
+ if( ebx&0x00000100 )
+ cpu |= X264_CPU_BMI2;
+ }
}
if( cpu & X264_CPU_SSSE3 )
}
}
- if( ecx&0x00000080 ) /* Misalign SSE */
- {
- cpu |= X264_CPU_SSE_MISALIGN;
- x264_cpu_mask_misalign_sse();
- }
-
if( cpu & X264_CPU_AVX )
{
if( ecx&0x00000800 ) /* XOP */
x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
cache = ecx&0xff; // cacheline size
}
- if( !cache )
+ if( !cache && max_basic_cap >= 2 )
{
// Cache and TLB Information
static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
}
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
cpu |= X264_CPU_STACK_MOD4;
#endif
return cpu;
}
-#elif ARCH_PPC
+#elif ARCH_PPC && HAVE_ALTIVEC
-#if SYS_MACOSX || SYS_OPENBSD
+#if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD
#include <sys/sysctl.h>
uint32_t x264_cpu_detect( void )
{
uint32_t cpu = 0;
#if SYS_OPENBSD
int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
-#else
+#elif SYS_MACOSX
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
#endif
int has_altivec = 0;
size_t length = sizeof( has_altivec );
+#if SYS_MACOSX || SYS_OPENBSD
int error = sysctl( selectors, 2, &has_altivec, &length, NULL, 0 );
+#else
+ int error = sysctlbyname( "hw.altivec", &has_altivec, &length, NULL, 0 );
+#endif
if( error == 0 && has_altivec != 0 )
cpu |= X264_CPU_ALTIVEC;
uint32_t x264_cpu_detect( void )
{
+#ifdef __NO_FPRS__
+ return 0;
+#else
static void (*oldsig)( int );
oldsig = signal( SIGILL, sigill_handler );
signal( SIGILL, oldsig );
return X264_CPU_ALTIVEC;
+#endif
}
#endif
return flags;
}
+#elif ARCH_AARCH64
+
+uint32_t x264_cpu_detect( void )
+{
+ return X264_CPU_ARMV8 | X264_CPU_NEON;
+}
+
+#elif ARCH_MIPS
+
+uint32_t x264_cpu_detect( void )
+{
+ uint32_t flags = 0;
+#if HAVE_MSA
+ flags |= X264_CPU_MSA;
+#endif
+ return flags;
+}
+
#else
uint32_t x264_cpu_detect( void )
return sysconf( _SC_NPROCESSORS_ONLN );
#elif SYS_LINUX
+#ifdef __ANDROID__
+ // Android NDK does not expose sched_getaffinity
+ return sysconf( _SC_NPROCESSORS_CONF );
+#else
cpu_set_t p_aff;
memset( &p_aff, 0, sizeof(p_aff) );
if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
return np;
#endif
+#endif
#elif SYS_BEOS
system_info info;