/*****************************************************************************
* cpu.c: CPU detection code
*****************************************************************************
- * Copyright (C) 1998-2002 VideoLAN
- * $Id: cpu.c,v 1.5 2002/08/19 11:13:45 sam Exp $
+ * Copyright (C) 1998-2004 the VideoLAN team
+ * $Id$
*
* Authors: Samuel Hocevar <sam@zoy.org>
* Christophe Massiot <massiot@via.ecp.fr>
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
-#include <signal.h> /* SIGHUP, SIGINT, SIGKILL */
-#include <setjmp.h> /* longjmp, setjmp */
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
-#include <vlc/vlc.h>
+#include <vlc_common.h>
+#include <vlc_cpu.h>
-#ifdef SYS_DARWIN
-# include <mach/mach.h> /* AltiVec detection */
-# include <mach/mach_error.h> /* some day the header files||compiler *
- will define it for us */
-# include <mach/bootstrap.h>
+#include <sys/types.h>
+#ifndef WIN32
+#include <unistd.h>
+#include <sys/wait.h>
+#include <signal.h>
+#else
+#include <errno.h>
#endif
+#include <assert.h>
-#include "vlc_cpu.h"
+#include "libvlc.h"
-/*****************************************************************************
- * Local prototypes
- *****************************************************************************/
-static void SigHandler ( int );
-static u32 Capabilities ( vlc_object_t * );
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
-/*****************************************************************************
- * Global variables - they're needed for signal handling
- *****************************************************************************/
-static jmp_buf env;
-static int i_illegal;
-#if defined( __i386__ )
-static char *psz_capability;
+#if defined(__OpenBSD__)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
#endif
-/*****************************************************************************
- * CPUCapabilities: get the CPU capabilities
- *****************************************************************************
- * This function is a wrapper around Capabilities().
- *****************************************************************************/
-u32 __CPUCapabilities( vlc_object_t *p_this )
+#if defined(__SunOS)
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/processor.h>
+#include <sys/pset.h>
+#endif
+
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
+ || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
+# ifndef WIN32
+static bool check_OS_capability( const char *psz_capability, pid_t pid )
{
- u32 i_capabilities;
+ int status;
- vlc_mutex_lock( p_this->p_vlc->p_global_lock );
- i_capabilities = Capabilities( p_this );
- vlc_mutex_unlock( p_this->p_vlc->p_global_lock );
-
- return i_capabilities;
+ if( pid == -1 )
+ return false; /* fail safe :-/ */
+
+ while( waitpid( pid, &status, 0 ) == -1 );
+
+ if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
+ return true;
+
+ fprintf( stderr, "warning: your CPU has %s instructions, but not your "
+ "operating system.\n", psz_capability );
+ fprintf( stderr, " some optimizations will be disabled unless "
+ "you upgrade your OS\n" );
+ return false;
}
+# define check_capability(name, flag, code) \
+ do { \
+ pid_t pid = fork(); \
+ if( pid == 0 ) \
+ { \
+ signal(SIGILL, SIG_DFL); \
+ __asm__ __volatile__ ( code : : ); \
+ _exit(0); \
+ } \
+ if( check_OS_capability((name), pid )) \
+ i_capabilities |= (flag); \
+ } while(0)
+
+# else /* WIN32 */
+# define check_capability(name, flag, code) \
+ i_capabilities |= (flag);
+# endif
+#endif
+
/*****************************************************************************
- * Capabilities: list the processors MMX support and other capabilities
+ * CPUCapabilities: get the CPU capabilities
*****************************************************************************
* This function is called to list extensions the CPU may have.
*****************************************************************************/
-static u32 Capabilities( vlc_object_t *p_this )
+uint32_t CPUCapabilities( void )
{
- volatile u32 i_capabilities = CPU_CAPABILITY_NONE;
+ uint32_t i_capabilities = 0;
-#if defined( SYS_DARWIN )
- struct host_basic_info hi;
- kern_return_t ret;
- host_name_port_t host;
-
- int i_size;
- char *psz_name, *psz_subname;
-
- i_capabilities |= CPU_CAPABILITY_FPU;
-
- /* Should 'never' fail? */
- host = mach_host_self();
-
- i_size = sizeof( hi ) / sizeof( int );
- ret = host_info( host, HOST_BASIC_INFO, ( host_info_t )&hi, &i_size );
-
- if( ret != KERN_SUCCESS )
- {
- fprintf( stderr, "error: couldn't get CPU information\n" );
- return i_capabilities;
- }
-
- slot_name( hi.cpu_type, hi.cpu_subtype, &psz_name, &psz_subname );
- /* FIXME: need better way to detect newer proccessors.
- * could do strncmp(a,b,5), but that's real ugly */
- if( !strcmp(psz_name, "ppc7400") || !strcmp(psz_name, "ppc7450") )
- {
- i_capabilities |= CPU_CAPABILITY_ALTIVEC;
- }
-
- return i_capabilities;
-
-#elif defined( __i386__ )
- volatile unsigned int i_eax, i_ebx, i_ecx, i_edx;
- volatile vlc_bool_t b_amd;
+#if defined( __i386__ ) || defined( __x86_64__ )
+ unsigned int i_eax, i_ebx, i_ecx, i_edx;
+ bool b_amd;
/* Needed for x86 CPU capabilities detection */
-# define cpuid( a ) \
- asm volatile ( "pushl %%ebx\n\t" \
- "cpuid\n\t" \
- "movl %%ebx,%1\n\t" \
- "popl %%ebx\n\t" \
- : "=a" ( i_eax ), \
- "=r" ( i_ebx ), \
- "=c" ( i_ecx ), \
- "=d" ( i_edx ) \
- : "a" ( a ) \
- : "cc" );
-
-# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW )
- sighandler_t pf_sigill = signal( SIGILL, SigHandler );
+# if defined( __x86_64__ )
+# define cpuid( reg ) \
+ asm volatile ( "cpuid\n\t" \
+ "movl %%ebx,%1\n\t" \
+ : "=a" ( i_eax ), \
+ "=b" ( i_ebx ), \
+ "=c" ( i_ecx ), \
+ "=d" ( i_edx ) \
+ : "a" ( reg ) \
+ : "cc" );
+# else
+# define cpuid( reg ) \
+ asm volatile ( "push %%ebx\n\t" \
+ "cpuid\n\t" \
+ "movl %%ebx,%1\n\t" \
+ "pop %%ebx\n\t" \
+ : "=a" ( i_eax ), \
+ "=r" ( i_ebx ), \
+ "=c" ( i_ecx ), \
+ "=d" ( i_edx ) \
+ : "a" ( reg ) \
+ : "cc" );
# endif
-
- i_capabilities |= CPU_CAPABILITY_FPU;
-
- /* test for a 486 CPU */
- asm volatile ( "pushl %%ebx\n\t"
- "pushfl\n\t"
- "popl %%eax\n\t"
+ /* Check if the OS really supports the requested instructions */
+# if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
+ && !defined (__i686__) && !defined (__pentium4__) \
+ && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
+ /* check if cpuid instruction is supported */
+ asm volatile ( "push %%ebx\n\t"
+ "pushf\n\t"
+ "pop %%eax\n\t"
"movl %%eax, %%ebx\n\t"
"xorl $0x200000, %%eax\n\t"
- "pushl %%eax\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %%eax\n\t"
+ "push %%eax\n\t"
+ "popf\n\t"
+ "pushf\n\t"
+ "pop %%eax\n\t"
"movl %%ebx,%1\n\t"
- "popl %%ebx\n\t"
+ "pop %%ebx\n\t"
: "=a" ( i_eax ),
"=r" ( i_ebx )
:
: "cc" );
if( i_eax == i_ebx )
- {
-# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW )
- signal( SIGILL, pf_sigill );
-# endif
- return i_capabilities;
- }
-
- i_capabilities |= CPU_CAPABILITY_486;
+ goto out;
+# endif
/* the CPU supports the CPUID instruction - get its level */
cpuid( 0x00000000 );
+# if defined (__i386__) && !defined (__i586__) \
+ && !defined (__i686__) && !defined (__pentium4__) \
+ && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
if( !i_eax )
- {
-# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW )
- signal( SIGILL, pf_sigill );
-# endif
- return i_capabilities;
- }
-
- /* FIXME: this isn't correct, since some 486s have cpuid */
- i_capabilities |= CPU_CAPABILITY_586;
+ goto out;
+#endif
/* borrowed from mpeg2dec */
b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
/* test for the MMX flag */
cpuid( 0x00000001 );
-
+# if !defined (__MMX__)
if( ! (i_edx & 0x00800000) )
- {
-# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW )
- signal( SIGILL, pf_sigill );
-# endif
- return i_capabilities;
- }
-
+ goto out;
+# endif
i_capabilities |= CPU_CAPABILITY_MMX;
+# if defined (__SSE__)
+ i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
+# else
if( i_edx & 0x02000000 )
{
i_capabilities |= CPU_CAPABILITY_MMXEXT;
# ifdef CAN_COMPILE_SSE
- /* We test if OS supports the SSE instructions */
- psz_capability = "SSE";
- i_illegal = 0;
-
- if( setjmp( env ) == 0 )
- {
- /* Test a SSE instruction */
- __asm__ __volatile__ ( "xorps %%xmm0,%%xmm0\n" : : );
- }
-
- if( i_illegal == 0 )
- {
- i_capabilities |= CPU_CAPABILITY_SSE;
- }
+ check_capability( "SSE", CPU_CAPABILITY_SSE,
+ "xorps %%xmm0,%%xmm0\n" );
# endif
}
+# endif
+
+# if defined (__SSE2__)
+ i_capabilities |= CPU_CAPABILITY_SSE2;
+# elif defined (CAN_COMPILE_SSE2)
+ if( i_edx & 0x04000000 )
+ check_capability( "SSE2", CPU_CAPABILITY_SSE2,
+ "movupd %%xmm0, %%xmm0\n" );
+# endif
+
+# if defined (__SSE3__)
+ i_capabilities |= CPU_CAPABILITY_SSE3;
+# elif defined (CAN_COMPILE_SSE3)
+ if( i_ecx & 0x00000001 )
+ check_capability( "SSE3", CPU_CAPABILITY_SSE3,
+ "movsldup %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSSE3__)
+ i_capabilities |= CPU_CAPABILITY_SSSE3;
+# elif defined (CAN_COMPILE_SSSE3)
+ if( i_ecx & 0x00000200 )
+ check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
+ "pabsw %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSE4_1__)
+ i_capabilities |= CPU_CAPABILITY_SSE4_1;
+# elif defined (CAN_COMPILE_SSE4_1)
+ if( i_ecx & 0x00080000 )
+ check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
+ "pmaxsb %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSE4_2__)
+ i_capabilities |= CPU_CAPABILITY_SSE4_2;
+# elif defined (CAN_COMPILE_SSE4_2)
+ if( i_ecx & 0x00100000 )
+ check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
+ "pcmpgtq %%xmm1, %%xmm0\n" );
+# endif
/* test for additional capabilities */
cpuid( 0x80000000 );
if( i_eax < 0x80000001 )
- {
-# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW )
- signal( SIGILL, pf_sigill );
-# endif
- return i_capabilities;
- }
+ goto out;
/* list these additional capabilities */
cpuid( 0x80000001 );
-# ifdef CAN_COMPILE_3DNOW
+# if defined (__3dNOW__)
+ i_capabilities |= CPU_CAPABILITY_3DNOW;
+# elif defined (CAN_COMPILE_3DNOW)
if( i_edx & 0x80000000 )
- {
- psz_capability = "3D Now!";
- i_illegal = 0;
-
- if( setjmp( env ) == 0 )
- {
- /* Test a 3D Now! instruction */
- __asm__ __volatile__ ( "pfadd %%mm0,%%mm0\n" "femms\n" : : );
- }
-
- if( i_illegal == 0 )
- {
- i_capabilities |= CPU_CAPABILITY_3DNOW;
- }
- }
-# endif
+ check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
+ "pfadd %%mm0,%%mm0\n" "femms\n" );
+# endif
if( b_amd && ( i_edx & 0x00400000 ) )
{
i_capabilities |= CPU_CAPABILITY_MMXEXT;
}
+out:
-# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW )
- signal( SIGILL, pf_sigill );
+#elif defined( __arm__ )
+# if defined( __ARM_NEON__ )
+ i_capabilities |= CPU_CAPABILITY_NEON;
# endif
- return i_capabilities;
-#elif defined( __powerpc__ )
+#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
+ || defined( __ppc64__ )
-# ifdef CAN_COMPILE_ALTIVEC
- sighandler_t pf_sigill = signal( SIGILL, SigHandler );
-
- i_capabilities |= CPU_CAPABILITY_FPU;
+# if defined(__APPLE__) || defined(__OpenBSD__)
+# if defined(__OpenBSD__)
+ int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
+# else
+ int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+# endif
+ int i_has_altivec = 0;
+ size_t i_length = sizeof( i_has_altivec );
+ int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
- i_illegal = 0;
+ if( i_error == 0 && i_has_altivec != 0 )
+ i_capabilities |= CPU_CAPABILITY_ALTIVEC;
- if( setjmp( env ) == 0 )
+# elif defined( CAN_COMPILE_ALTIVEC )
+ pid_t pid = fork();
+ if( pid == 0 )
{
+ signal(SIGILL, SIG_DFL);
asm volatile ("mtspr 256, %0\n\t"
"vand %%v0, %%v0, %%v0"
:
: "r" (-1));
+ _exit(0);
}
- if( i_illegal == 0 )
- {
+ if( check_OS_capability( "Altivec", pid ) )
i_capabilities |= CPU_CAPABILITY_ALTIVEC;
- }
- signal( SIGILL, pf_sigill );
# endif
+#endif
return i_capabilities;
+}
-#elif defined( __sparc__ )
+uint32_t cpu_flags = 0;
- i_capabilities |= CPU_CAPABILITY_FPU;
- return i_capabilities;
-#else
- /* default behaviour */
- return i_capabilities;
+/*****************************************************************************
+ * vlc_CPU: get pre-computed CPU capability flags
+ ****************************************************************************/
+unsigned vlc_CPU (void)
+{
+ return cpu_flags;
+}
+const struct
+{
+ uint32_t value;
+ char name[12];
+} cap_dirs[] = {
+#if defined ( __i386__ ) || defined ( __x86_64__ )
+ { CPU_CAPABILITY_MMX, "mmx" },
+ { CPU_CAPABILITY_MMXEXT, "mmxext" },
+ { CPU_CAPABILITY_3DNOW, "3dnow" },
+ { CPU_CAPABILITY_SSE, "sse" },
+#endif
+#if defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
+ { CPU_CAPABILITY_ALTIVEC, "altivec" },
+#endif
+#if defined (__arm__)
+ { CPU_CAPABILITY_NEON, "arm_neon" },
+#endif
+};
+
+/**
+ * Return the number of available logical CPU.
+ */
+unsigned vlc_GetCPUCount(void)
+{
+#if defined(WIN32) && !defined(UNDER_CE)
+ DWORD process_mask;
+ DWORD system_mask;
+ if (!GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask))
+ return 1;
+
+ unsigned count = 0;
+ while (system_mask) {
+ count++;
+ system_mask >>= 1;
+ }
+ return count;
+#elif defined(HAVE_SCHED_GETAFFINITY)
+ cpu_set_t cpu;
+ CPU_ZERO(&cpu);
+ if (sched_getaffinity(0, sizeof(cpu), &cpu) < 0)
+ return 1;
+ unsigned count = 0;
+ for (unsigned i = 0; i < CPU_SETSIZE; i++)
+ count += CPU_ISSET(i, &cpu) != 0;
+ return count;
+#elif defined(__APPLE__)
+ int count;
+ size_t size = sizeof(count) ;
+ if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0))
+ return 1; /* Failure */
+ return count;
+#elif defined(__OpenBSD__)
+ int selectors[2] = { CTL_HW, HW_NCPU };
+ int count;
+ size_t size = sizeof(count) ;
+ if (sysctl(selectors, 2, &count, &size, NULL, 0))
+ return 1; /* Failure */
+ return count;
+#elif defined(__SunOS)
+ unsigned count = 0;
+ int type;
+ u_int numcpus;
+ processorid_t *cpulist;
+ processor_info_t cpuinfo;
+ cpulist = malloc(sizeof(processorid_t) * sysconf(_SC_NPROCESSORS_MAX));
+ if (!cpulist) return 1;
+ if (pset_info(PS_MYID, &type, &numcpus, cpulist)==0)
+ {
+ for (u_int i = 0; i < numcpus; i++)
+ {
+ if (!processor_info(cpulist[i], &cpuinfo))
+ count += (cpuinfo.pi_state == P_ONLINE)?1:0;
+ }
+ } else {
+ count = sysconf(_SC_NPROCESSORS_ONLN);
+ }
+ free(cpulist);
+ return (count>0)?count:1;
+#else
+# warning "vlc_GetCPUCount is not implemented for your platform"
+ return 1;
#endif
}
-/*****************************************************************************
- * SigHandler: system signal handler
- *****************************************************************************
- * This function is called when an illegal instruction signal is received by
- * the program. We use this function to test OS and CPU capabilities
- *****************************************************************************/
-static void SigHandler( int i_signal )
+/**
+ * Check if a directory name contains usable plugins w.r.t. the hardware
+ * capabilities. Loading a plugin when the hardware has insufficient
+ * capabilities may lead to illegal instructions (SIGILL) and must be avoided.
+ *
+ * @param name the name of the directory (<b>not</b> the path)
+ *
+ * @return true if the hardware has sufficient capabilities or the directory
+ * does not require any special capability; false if the running hardware has
+ * insufficient capabilities.
+ */
+bool vlc_CPU_CheckPluginDir (const char *name)
{
- /* Acknowledge the signal received */
- i_illegal = 1;
+ const unsigned flags = vlc_CPU ();
+ for (size_t i = 0; i < sizeof (cap_dirs) / sizeof (cap_dirs[0]); i++)
+ {
+ if (strcmp (name, cap_dirs[i].name))
+ continue;
+ return (flags & cap_dirs[i].value) != 0;
+ }
+ return true;
+}
-#ifdef HAVE_SIGRELSE
- sigrelse( i_signal );
-#endif
+static vlc_memcpy_t pf_vlc_memcpy = memcpy;
+static vlc_memset_t pf_vlc_memset = memset;
-#if defined( __i386__ )
- fprintf( stderr, "warning: your CPU has %s instructions, but not your "
- "operating system.\n", psz_capability );
- fprintf( stderr, " some optimizations will be disabled unless "
- "you upgrade your OS\n" );
-# if defined( SYS_LINUX )
- fprintf( stderr, " (for instance Linux kernel 2.4.x or later)\n" );
-# endif
-#endif
+void vlc_fastmem_register (vlc_memcpy_t cpy, vlc_memset_t set)
+{
+ if (cpy)
+ pf_vlc_memcpy = cpy;
+ if (set)
+ pf_vlc_memset = set;
+}
- longjmp( env, 1 );
+/**
+ * vlc_memcpy: fast CPU-dependent memcpy
+ */
+void *vlc_memcpy (void *tgt, const void *src, size_t n)
+{
+ return pf_vlc_memcpy (tgt, src, n);
+}
+
+/**
+ * vlc_memset: fast CPU-dependent memset
+ */
+void *vlc_memset (void *tgt, int c, size_t n)
+{
+ return pf_vlc_memset (tgt, c, n);
+}
+
+/**
+ * Returned an aligned pointer on newly allocated memory.
+ * \param alignment must be a power of 2 and a multiple of sizeof(void*)
+ * \param size is the size of the usable memory returned.
+ *
+ * It must not be freed directly, *base must.
+ */
+void *vlc_memalign(void **base, size_t alignment, size_t size)
+{
+ assert(alignment >= sizeof(void*));
+ for (size_t t = alignment; t > 1; t >>= 1)
+ assert((t&1) == 0);
+#if defined(HAVE_POSIX_MEMALIGN)
+ if (posix_memalign(base, alignment, size)) {
+ *base = NULL;
+ return NULL;
+ }
+ return *base;
+#elif defined(HAVE_MEMALIGN)
+ return *base = memalign(alignment, size);
+#else
+ unsigned char *p = *base = malloc(size + alignment - 1);
+ if (!p)
+ return NULL;
+ return (void*)((uintptr_t)(p + alignment - 1) & ~(alignment - 1));
+#endif
}