]> git.sesse.net Git - vlc/blobdiff - src/misc/cpu.c
Improve x86 cpuid
[vlc] / src / misc / cpu.c
index e630e9b60263f6069ea075b8403d491779bfb043..69b471b2ca9444ca4dbae65c2679107b128d03db 100644 (file)
 #endif
 #include <assert.h>
 
-#include "libvlc.h"
-
-#if defined(__APPLE__)
+#ifdef __APPLE__
 #include <sys/sysctl.h>
 #endif
 
-#if defined(__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-#endif
+#include "libvlc.h"
 
-#if defined(__SunOS)
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/processor.h>
-#include <sys/pset.h>
-#endif
+static uint32_t cpu_flags;
 
 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
  || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
-# ifndef WIN32
+# if !defined( WIN32 ) && !defined( __OS2__ )
 static bool check_OS_capability( const char *psz_capability, pid_t pid )
 {
     int status;
@@ -84,31 +73,30 @@ static bool check_OS_capability( const char *psz_capability, pid_t pid )
     return false;
 }
 
-#  define check_capability(name, flag, code)   \
-     do {                                      \
-        pid_t pid = fork();                    \
-        if( pid == 0 )                         \
-        {                                      \
-            signal(SIGILL, SIG_DFL);           \
-            __asm__ __volatile__ ( code : : ); \
-            _exit(0);                          \
-        }                                      \
-        if( check_OS_capability((name), pid )) \
-            i_capabilities |= (flag);          \
+#  define check_capability(name, flag, code, input)     \
+     do {                                               \
+        pid_t pid = fork();                             \
+        if( pid == 0 )                                  \
+        {                                               \
+            signal(SIGILL, SIG_DFL);                    \
+            __asm__ __volatile__ ( code : : input );    \
+            _exit(0);                                   \
+        }                                               \
+        if( check_OS_capability((name), pid ))          \
+            i_capabilities |= (flag);                   \
      } while(0)
 
-# else /* WIN32 */
-#  define check_capability(name, flag, code)   \
+# else /* WIN32 || __OS2__ */
+#  define check_capability(name, flag, code, input)   \
         i_capabilities |= (flag);
 # endif
 #endif
 
-/*****************************************************************************
- * CPUCapabilities: get the CPU capabilities
- *****************************************************************************
- * This function is called to list extensions the CPU may have.
- *****************************************************************************/
-uint32_t CPUCapabilities( void )
+/**
+ * Determines the CPU capabilities and stores them in cpu_flags.
+ * The result can be retrieved with vlc_CPU().
+ */
+void vlc_CPU_init (void)
 {
     uint32_t i_capabilities = 0;
 
@@ -117,29 +105,21 @@ uint32_t CPUCapabilities( void )
      bool b_amd;
 
     /* Needed for x86 CPU capabilities detection */
-#   if defined( __x86_64__ )
-#       define cpuid( reg )                    \
-            asm volatile ( "cpuid\n\t"         \
-                           "movl %%ebx,%1\n\t" \
-                         : "=a" ( i_eax ),     \
-                           "=b" ( i_ebx ),     \
-                           "=c" ( i_ecx ),     \
-                           "=d" ( i_edx )      \
-                         : "a"  ( reg )        \
-                         : "cc" );
-#   else
-#       define cpuid( reg )                    \
-            asm volatile ( "push %%ebx\n\t"    \
-                           "cpuid\n\t"         \
-                           "movl %%ebx,%1\n\t" \
-                           "pop %%ebx\n\t"     \
-                         : "=a" ( i_eax ),     \
-                           "=r" ( i_ebx ),     \
-                           "=c" ( i_ecx ),     \
-                           "=d" ( i_edx )      \
-                         : "a"  ( reg )        \
-                         : "cc" );
-#   endif
+# if defined (__i386__) && defined (__PIC__)
+#  define cpuid(reg) \
+     asm volatile ("xchgl %%ebx,%1\n\t" \
+                   "cpuid\n\t" \
+                   "xchgl %%ebx,%1\n\t" \
+                   : "=a" (i_eax), "=r" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
+                   : "a" (reg) \
+                   : "cc");
+# else
+#  define cpuid(reg) \
+     asm volatile ("cpuid\n\t" \
+                   : "=a" (i_eax), "=b" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
+                   : "a" (reg) \
+                   : "cc");
+# endif
      /* Check if the OS really supports the requested instructions */
 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
   && !defined (__i686__) && !defined (__pentium4__) \
@@ -196,7 +176,7 @@ uint32_t CPUCapabilities( void )
 
 #   ifdef CAN_COMPILE_SSE
         check_capability( "SSE", CPU_CAPABILITY_SSE,
-                          "xorps %%xmm0,%%xmm0\n" );
+                          "xorps %%xmm0,%%xmm0\n", );
 #   endif
     }
 # endif
@@ -206,7 +186,7 @@ uint32_t CPUCapabilities( void )
 # elif defined (CAN_COMPILE_SSE2)
     if( i_edx & 0x04000000 )
         check_capability( "SSE2", CPU_CAPABILITY_SSE2,
-                          "movupd %%xmm0, %%xmm0\n" );
+                          "movupd %%xmm0, %%xmm0\n", );
 # endif
 
 # if defined (__SSE3__)
@@ -214,7 +194,7 @@ uint32_t CPUCapabilities( void )
 # elif defined (CAN_COMPILE_SSE3)
     if( i_ecx & 0x00000001 )
         check_capability( "SSE3", CPU_CAPABILITY_SSE3,
-                          "movsldup %%xmm1, %%xmm0\n" );
+                          "movsldup %%xmm1, %%xmm0\n", );
 # endif
 
 # if defined (__SSSE3__)
@@ -222,7 +202,7 @@ uint32_t CPUCapabilities( void )
 # elif defined (CAN_COMPILE_SSSE3)
     if( i_ecx & 0x00000200 )
         check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
-                          "pabsw %%xmm1, %%xmm0\n" );
+                          "pabsw %%xmm1, %%xmm0\n", );
 # endif
 
 # if defined (__SSE4_1__)
@@ -230,7 +210,7 @@ uint32_t CPUCapabilities( void )
 # elif defined (CAN_COMPILE_SSE4_1)
     if( i_ecx & 0x00080000 )
         check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
-                          "pmaxsb %%xmm1, %%xmm0\n" );
+                          "pmaxsb %%xmm1, %%xmm0\n", );
 # endif
 
 # if defined (__SSE4_2__)
@@ -238,7 +218,7 @@ uint32_t CPUCapabilities( void )
 # elif defined (CAN_COMPILE_SSE4_2)
     if( i_ecx & 0x00100000 )
         check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
-                          "pcmpgtq %%xmm1, %%xmm0\n" );
+                          "pcmpgtq %%xmm1, %%xmm0\n", );
 # endif
 
     /* test for additional capabilities */
@@ -255,7 +235,7 @@ uint32_t CPUCapabilities( void )
 # elif defined (CAN_COMPILE_3DNOW)
     if( i_edx & 0x80000000 )
         check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
-                          "pfadd %%mm0,%%mm0\n" "femms\n" );
+                          "pfadd %%mm0,%%mm0\n" "femms\n", );
 # endif
 
     if( b_amd && ( i_edx & 0x00400000 ) )
@@ -264,10 +244,44 @@ uint32_t CPUCapabilities( void )
     }
 out:
 
-#elif defined( __arm__ )
-#   if defined( __ARM_NEON__ )
+#elif defined (__arm__)
+
+# if defined (__ARM_NEON__)
     i_capabilities |= CPU_CAPABILITY_NEON;
+# elif defined (CAN_COMPILE_NEON)
+#  define NEED_RUNTIME_CPU_CHECK 1
+# endif
+
+# ifdef NEED_RUNTIME_CPU_CHECK
+#  if defined (__linux__)
+    FILE *info = fopen ("/proc/cpuinfo", "rt");
+    if (info != NULL)
+    {
+        char *line = NULL;
+        size_t linelen = 0;
+
+        while (getline (&line, &linelen, info) != -1)
+        {
+            const char *cap;
+
+            if (strncmp (line, "Features\t:", 10))
+                continue;
+
+            /* TODO: detect other CPU features when we use them */
+#   if defined (CAN_COMPILE_NEON) && !defined (__ARM_NEON__)
+                cap = strstr (line + 10, " neon");
+            if (cap != NULL && (cap[5] == '\0' || cap[5] == ' '))
+                i_capabilities |= CPU_CAPABILITY_NEON;
 #   endif
+            break;
+        }
+        fclose (info);
+        free (line);
+    }
+#  else
+#   warning Run-time CPU detection missing: optimizations disabled!
+#  endif
+# endif
 
 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
     || defined( __ppc64__ )
@@ -286,153 +300,76 @@ out:
         i_capabilities |= CPU_CAPABILITY_ALTIVEC;
 
 #   elif defined( CAN_COMPILE_ALTIVEC )
-    pid_t pid = fork();
-    if( pid == 0 )
-    {
-        signal(SIGILL, SIG_DFL);
-        asm volatile ("mtspr 256, %0\n\t"
-                      "vand %%v0, %%v0, %%v0"
-                      :
-                      : "r" (-1));
-        _exit(0);
-    }
-
-    if( check_OS_capability( "Altivec", pid ) )
-        i_capabilities |= CPU_CAPABILITY_ALTIVEC;
+    check_capability( "Altivec", CPU_CAPABILITY_ALTIVEC,
+        "mtspr 256, %0\n\t"
+        "vand %%v0, %%v0, %%v0",
+                      "r" (-1));
 
 #   endif
 
 #endif
-    return i_capabilities;
-}
-
-uint32_t cpu_flags = 0;
 
+    cpu_flags = i_capabilities;
+}
 
-/*****************************************************************************
- * vlc_CPU: get pre-computed CPU capability flags
- ****************************************************************************/
+/**
+ * Retrieves pre-computed CPU capability flags
+ */
 unsigned vlc_CPU (void)
 {
+/* On Windows and OS/2,
+ * initialized from DllMain() and _DLL_InitTerm() respectively, instead */
+#if !defined(WIN32) && !defined(__OS2__)
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once (&once, vlc_CPU_init);
+#endif
     return cpu_flags;
 }
 
-const struct
+void vlc_CPU_dump (vlc_object_t *obj)
 {
-    uint32_t value;
-    char name[12];
-} cap_dirs[] = {
-#if defined ( __i386__ ) || defined ( __x86_64__ )
-    { CPU_CAPABILITY_MMX,     "mmx" },
-    { CPU_CAPABILITY_MMXEXT,  "mmxext" },
-    { CPU_CAPABILITY_3DNOW,   "3dnow" },
-    { CPU_CAPABILITY_SSE,     "sse" },
-#endif
-#if defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
-    { CPU_CAPABILITY_ALTIVEC, "altivec" },
-#endif
-#if defined (__arm__)
-    { CPU_CAPABILITY_NEON,    "arm_neon" },
+    const unsigned flags = vlc_CPU();
+    char buf[200], *p = buf;
+
+#define PRINT_CAPABILITY( capability, string ) \
+    if (flags & (capability)) \
+        p += sprintf (p, "%s ", (string) )
+
+#if defined (__i386__) || defined (__x86_64__)
+    PRINT_CAPABILITY(CPU_CAPABILITY_MMX, "MMX");
+    PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
+    PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A,  "SSE4A");
+
+#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
+    PRINT_CAPABILITY(CPU_CAPABILITY_ALTIVEC, "AltiVec");
+
+#elif defined (__arm__)
+    PRINT_CAPABILITY(CPU_CAPABILITY_NEON, "NEONv1");
+
 #endif
-};
 
-/**
- * Return the number of available logical CPU.
- */
-unsigned vlc_GetCPUCount(void)
-{
-#if defined(WIN32) && !defined(UNDER_CE)
-    DWORD process_mask;
-    DWORD system_mask;
-    if (!GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask))
-        return 1;
-
-    unsigned count = 0;
-    while (system_mask) {
-        count++;
-        system_mask >>= 1;
-    }
-    return count;
-#elif defined(HAVE_SCHED_GETAFFINITY)
-    cpu_set_t cpu;
-    CPU_ZERO(&cpu);
-    if (sched_getaffinity(0, sizeof(cpu), &cpu) < 0)
-        return 1;
-    unsigned count = 0;
-    for (unsigned i = 0; i < CPU_SETSIZE; i++)
-        count += CPU_ISSET(i, &cpu) != 0;
-    return count;
-#elif defined(__APPLE__)
-    int count;
-    size_t size = sizeof(count) ;
-    if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0))
-        return 1; /* Failure */
-    return count;
-#elif defined(__OpenBSD__)
-    int selectors[2] = { CTL_HW, HW_NCPU };
-    int count;
-    size_t size = sizeof(count) ;
-    if (sysctl(selectors, 2, &count, &size, NULL, 0))
-        return 1; /* Failure */
-    return count;
-#elif defined(__SunOS)
-    unsigned count = 0;
-    int type;
-    u_int numcpus;
-    processorid_t *cpulist;
-    processor_info_t cpuinfo;
-    cpulist = malloc(sizeof(processorid_t) * sysconf(_SC_NPROCESSORS_MAX));
-    if (!cpulist) return 1;
-    if (pset_info(PS_MYID, &type, &numcpus, cpulist)==0)
-    {
-        for (u_int i = 0; i < numcpus; i++)
-        {
-            if (!processor_info(cpulist[i], &cpuinfo))
-                count += (cpuinfo.pi_state == P_ONLINE)?1:0;
-        }
-    } else {
-        count = sysconf(_SC_NPROCESSORS_ONLN);
-    }
-    free(cpulist);
-    return (count>0)?count:1;
-#else
-#   warning "vlc_GetCPUCount is not implemented for your platform"
-    return 1;
+#if HAVE_FPU
+    p += sprintf (p, "FPU ");
 #endif
-}
 
-/**
- * Check if a directory name contains usable plugins w.r.t. the hardware
- * capabilities. Loading a plugin when the hardware has insufficient
- * capabilities may lead to illegal instructions (SIGILL) and must be avoided.
- *
- * @param name the name of the directory (<b>not</b> the path)
- *
- * @return true if the hardware has sufficient capabilities or the directory
- * does not require any special capability; false if the running hardware has
- * insufficient capabilities.
- */
-bool vlc_CPU_CheckPluginDir (const char *name)
-{
-    const unsigned flags = vlc_CPU ();
-    for (size_t i = 0; i < sizeof (cap_dirs) / sizeof (cap_dirs[0]); i++)
-    {
-        if (strcmp (name, cap_dirs[i].name))
-            continue;
-        return (flags & cap_dirs[i].value) != 0;
-    }
-    return true;
+    if (p > buf)
+        msg_Dbg (obj, "CPU has capabilities %s", buf);
 }
 
+
 static vlc_memcpy_t pf_vlc_memcpy = memcpy;
-static vlc_memset_t pf_vlc_memset = memset;
 
-void vlc_fastmem_register (vlc_memcpy_t cpy, vlc_memset_t set)
+void vlc_fastmem_register (vlc_memcpy_t cpy)
 {
-    if (cpy)
-        pf_vlc_memcpy = cpy;
-    if (set)
-        pf_vlc_memset = set;
+    assert (cpy != NULL);
+    pf_vlc_memcpy = cpy;
 }
 
 /**
@@ -442,40 +379,3 @@ void *vlc_memcpy (void *tgt, const void *src, size_t n)
 {
     return pf_vlc_memcpy (tgt, src, n);
 }
-
-/**
- * vlc_memset: fast CPU-dependent memset
- */
-void *vlc_memset (void *tgt, int c, size_t n)
-{
-    return pf_vlc_memset (tgt, c, n);
-}
-
-/**
- * Returned an aligned pointer on newly allocated memory.
- * \param alignment must be a power of 2 and a multiple of sizeof(void*)
- * \param size is the size of the usable memory returned.
- *
- * It must not be freed directly, *base must.
- */
-void *vlc_memalign(void **base, size_t alignment, size_t size)
-{
-    assert(alignment >= sizeof(void*));
-    for (size_t t = alignment; t > 1; t >>= 1)
-        assert((t&1) == 0);
-#if defined(HAVE_POSIX_MEMALIGN)
-    if (posix_memalign(base, alignment, size)) {
-        *base = NULL;
-        return NULL;
-    }
-    return *base;
-#elif defined(HAVE_MEMALIGN)
-    return *base = memalign(alignment, size);
-#else
-    unsigned char *p = *base = malloc(size + alignment - 1);
-    if (!p)
-        return NULL;
-    return (void*)((uintptr_t)(p + alignment - 1) & ~(alignment - 1));
-#endif
-}
-