]> git.sesse.net Git - vlc/blobdiff - src/misc/cpu.c
Improve x86 cpuid
[vlc] / src / misc / cpu.c
index 4028b16834638a96fd6a44a868f1dc85d94baa5d..69b471b2ca9444ca4dbae65c2679107b128d03db 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
 /*****************************************************************************
  * Preamble
  *****************************************************************************/
-#include <vlc/vlc.h>
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
 
-#ifdef HAVE_SIGNAL_H
-#   include <signal.h>                            /* SIGHUP, SIGINT, SIGKILL */
-#   include <setjmp.h>                                    /* longjmp, setjmp */
+#include <sys/types.h>
+#ifndef WIN32
+#include <unistd.h>
+#include <sys/wait.h>
+#include <signal.h>
+#else
+#include <errno.h>
 #endif
+#include <assert.h>
 
-#ifdef SYS_DARWIN
+#ifdef __APPLE__
 #include <sys/sysctl.h>
 #endif
 
-#include "vlc_cpu.h"
+#include "libvlc.h"
 
-/*****************************************************************************
- * Local prototypes
- *****************************************************************************/
-#ifdef HAVE_SIGNAL_H
-static void SigHandler   ( int );
-#endif
+static uint32_t cpu_flags;
 
-/*****************************************************************************
- * Global variables - they're needed for signal handling
- *****************************************************************************/
-#ifdef HAVE_SIGNAL_H
-static jmp_buf env;
-static int     i_illegal;
-#if defined( __i386__ ) || defined( __x86_64__ )
-static char   *psz_capability;
-#endif
-#endif
-
-/*****************************************************************************
- * CPUCapabilities: get the CPU capabilities
- *****************************************************************************
- * This function is called to list extensions the CPU may have.
- *****************************************************************************/
-uint32_t CPUCapabilities( void )
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
+ || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
+# if !defined( WIN32 ) && !defined( __OS2__ )
+static bool check_OS_capability( const char *psz_capability, pid_t pid )
 {
-    volatile uint32_t i_capabilities = CPU_CAPABILITY_NONE;
+    int status;
 
-#if defined( SYS_DARWIN )
-    int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-    int i_has_altivec = 0;
-    size_t i_length = sizeof( i_has_altivec );
-    int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
+    if( pid == -1 )
+        return false; /* fail safe :-/ */
 
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
-    if( i_error == 0 && i_has_altivec != 0 )
-        i_capabilities |= CPU_CAPABILITY_ALTIVEC;
+    while( waitpid( pid, &status, 0 ) == -1 );
 
-    return i_capabilities;
+    if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
+        return true;
 
-#elif defined( __i386__ ) || defined( __x86_64__ )
-    volatile unsigned int  i_eax, i_ebx, i_ecx, i_edx;
-    volatile vlc_bool_t    b_amd;
+    fprintf( stderr, "warning: your CPU has %s instructions, but not your "
+                     "operating system.\n", psz_capability );
+    fprintf( stderr, "         some optimizations will be disabled unless "
+                     "you upgrade your OS\n" );
+    return false;
+}
 
-    /* Needed for x86 CPU capabilities detection */
-#   if defined( __x86_64__ )
-#       define cpuid( reg )                    \
-            asm volatile ( "push %%rbx\n\t"    \
-                           "cpuid\n\t"         \
-                           "movl %%ebx,%1\n\t" \
-                           "pop %%rbx\n\t"     \
-                         : "=a" ( i_eax ),     \
-                           "=r" ( i_ebx ),     \
-                           "=c" ( i_ecx ),     \
-                           "=d" ( i_edx )      \
-                         : "a"  ( reg )        \
-                         : "cc" );
-#   else
-#       define cpuid( reg )                    \
-            asm volatile ( "push %%ebx\n\t"    \
-                           "cpuid\n\t"         \
-                           "movl %%ebx,%1\n\t" \
-                           "pop %%ebx\n\t"     \
-                         : "=a" ( i_eax ),     \
-                           "=r" ( i_ebx ),     \
-                           "=c" ( i_ecx ),     \
-                           "=d" ( i_edx )      \
-                         : "a"  ( reg )        \
-                         : "cc" );
-#   endif
+#  define check_capability(name, flag, code, input)     \
+     do {                                               \
+        pid_t pid = fork();                             \
+        if( pid == 0 )                                  \
+        {                                               \
+            signal(SIGILL, SIG_DFL);                    \
+            __asm__ __volatile__ ( code : : input );    \
+            _exit(0);                                   \
+        }                                               \
+        if( check_OS_capability((name), pid ))          \
+            i_capabilities |= (flag);                   \
+     } while(0)
+
+# else /* WIN32 || __OS2__ */
+#  define check_capability(name, flag, code, input)   \
+        i_capabilities |= (flag);
+# endif
+#endif
 
-#   if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) \
-     && defined( HAVE_SIGNAL_H )
-    void (*pf_sigill) (int) = signal( SIGILL, SigHandler );
-#   endif
+/**
+ * Determines the CPU capabilities and stores them in cpu_flags.
+ * The result can be retrieved with vlc_CPU().
+ */
+void vlc_CPU_init (void)
+{
+    uint32_t i_capabilities = 0;
 
-    i_capabilities |= CPU_CAPABILITY_FPU;
+#if defined( __i386__ ) || defined( __x86_64__ )
+     unsigned int i_eax, i_ebx, i_ecx, i_edx;
+     bool b_amd;
 
-#   if defined( __i386__ )
+    /* Needed for x86 CPU capabilities detection */
+# if defined (__i386__) && defined (__PIC__)
+#  define cpuid(reg) \
+     asm volatile ("xchgl %%ebx,%1\n\t" \
+                   "cpuid\n\t" \
+                   "xchgl %%ebx,%1\n\t" \
+                   : "=a" (i_eax), "=r" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
+                   : "a" (reg) \
+                   : "cc");
+# else
+#  define cpuid(reg) \
+     asm volatile ("cpuid\n\t" \
+                   : "=a" (i_eax), "=b" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
+                   : "a" (reg) \
+                   : "cc");
+# endif
+     /* Check if the OS really supports the requested instructions */
+# if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
+  && !defined (__i686__) && !defined (__pentium4__) \
+  && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
     /* check if cpuid instruction is supported */
     asm volatile ( "push %%ebx\n\t"
                    "pushf\n\t"
@@ -136,33 +142,18 @@ uint32_t CPUCapabilities( void )
                  : "cc" );
 
     if( i_eax == i_ebx )
-    {
-#       if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) \
-            && defined( HAVE_SIGNAL_H )
-        signal( SIGILL, pf_sigill );
-#       endif
-        return i_capabilities;
-    }
-#   else
-    /* x86_64 supports cpuid instruction, so we dont need to check it */
-#   endif
-
-    i_capabilities |= CPU_CAPABILITY_486;
+        goto out;
+# endif
 
     /* the CPU supports the CPUID instruction - get its level */
     cpuid( 0x00000000 );
 
+# if defined (__i386__) && !defined (__i586__) \
+  && !defined (__i686__) && !defined (__pentium4__) \
+  && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
     if( !i_eax )
-    {
-#   if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) \
-     && defined( HAVE_SIGNAL_H )
-        signal( SIGILL, pf_sigill );
-#   endif
-        return i_capabilities;
-    }
-
-    /* FIXME: this isn't correct, since some 486s have cpuid */
-    i_capabilities |= CPU_CAPABILITY_586;
+        goto out;
+#endif
 
     /* borrowed from mpeg2dec */
     b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
@@ -170,175 +161,221 @@ uint32_t CPUCapabilities( void )
 
     /* test for the MMX flag */
     cpuid( 0x00000001 );
-
+# if !defined (__MMX__)
     if( ! (i_edx & 0x00800000) )
-    {
-#   if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) \
-     && defined( HAVE_SIGNAL_H )
-        signal( SIGILL, pf_sigill );
-#   endif
-        return i_capabilities;
-    }
-
+        goto out;
+# endif
     i_capabilities |= CPU_CAPABILITY_MMX;
 
+# if defined (__SSE__)
+    i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
+# else
     if( i_edx & 0x02000000 )
     {
         i_capabilities |= CPU_CAPABILITY_MMXEXT;
 
 #   ifdef CAN_COMPILE_SSE
-        /* We test if OS supports the SSE instructions */
-        psz_capability = "SSE";
-        i_illegal = 0;
-
-        if( setjmp( env ) == 0 )
-        {
-            /* Test a SSE instruction */
-            __asm__ __volatile__ ( "xorps %%xmm0,%%xmm0\n" : : );
-        }
-
-        if( i_illegal == 0 )
-        {
-            i_capabilities |= CPU_CAPABILITY_SSE;
-        }
+        check_capability( "SSE", CPU_CAPABILITY_SSE,
+                          "xorps %%xmm0,%%xmm0\n", );
 #   endif
     }
+# endif
 
+# if defined (__SSE2__)
+    i_capabilities |= CPU_CAPABILITY_SSE2;
+# elif defined (CAN_COMPILE_SSE2)
     if( i_edx & 0x04000000 )
-    {
-#   if defined(CAN_COMPILE_SSE)
-        /* We test if OS supports the SSE instructions */
-        psz_capability = "SSE2";
-        i_illegal = 0;
-
-        if( setjmp( env ) == 0 )
-        {
-            /* Test a SSE2 instruction */
-            __asm__ __volatile__ ( "movupd %%xmm0, %%xmm0\n" : : );
-        }
-
-        if( i_illegal == 0 )
-        {
-            i_capabilities |= CPU_CAPABILITY_SSE2;
-        }
-#   endif
-    }
+        check_capability( "SSE2", CPU_CAPABILITY_SSE2,
+                          "movupd %%xmm0, %%xmm0\n", );
+# endif
+
+# if defined (__SSE3__)
+    i_capabilities |= CPU_CAPABILITY_SSE3;
+# elif defined (CAN_COMPILE_SSE3)
+    if( i_ecx & 0x00000001 )
+        check_capability( "SSE3", CPU_CAPABILITY_SSE3,
+                          "movsldup %%xmm1, %%xmm0\n", );
+# endif
+
+# if defined (__SSSE3__)
+    i_capabilities |= CPU_CAPABILITY_SSSE3;
+# elif defined (CAN_COMPILE_SSSE3)
+    if( i_ecx & 0x00000200 )
+        check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
+                          "pabsw %%xmm1, %%xmm0\n", );
+# endif
+
+# if defined (__SSE4_1__)
+    i_capabilities |= CPU_CAPABILITY_SSE4_1;
+# elif defined (CAN_COMPILE_SSE4_1)
+    if( i_ecx & 0x00080000 )
+        check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
+                          "pmaxsb %%xmm1, %%xmm0\n", );
+# endif
+
+# if defined (__SSE4_2__)
+    i_capabilities |= CPU_CAPABILITY_SSE4_2;
+# elif defined (CAN_COMPILE_SSE4_2)
+    if( i_ecx & 0x00100000 )
+        check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
+                          "pcmpgtq %%xmm1, %%xmm0\n", );
+# endif
 
     /* test for additional capabilities */
     cpuid( 0x80000000 );
 
     if( i_eax < 0x80000001 )
-    {
-#   if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) \
-     && defined( HAVE_SIGNAL_H )
-        signal( SIGILL, pf_sigill );
-#   endif
-        return i_capabilities;
-    }
+        goto out;
 
     /* list these additional capabilities */
     cpuid( 0x80000001 );
 
-#   ifdef CAN_COMPILE_3DNOW
+# if defined (__3dNOW__)
+    i_capabilities |= CPU_CAPABILITY_3DNOW;
+# elif defined (CAN_COMPILE_3DNOW)
     if( i_edx & 0x80000000 )
-    {
-        psz_capability = "3D Now!";
-        i_illegal = 0;
-
-        if( setjmp( env ) == 0 )
-        {
-            /* Test a 3D Now! instruction */
-            __asm__ __volatile__ ( "pfadd %%mm0,%%mm0\n" "femms\n" : : );
-        }
-
-        if( i_illegal == 0 )
-        {
-            i_capabilities |= CPU_CAPABILITY_3DNOW;
-        }
-    }
-#   endif
+        check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
+                          "pfadd %%mm0,%%mm0\n" "femms\n", );
+# endif
 
     if( b_amd && ( i_edx & 0x00400000 ) )
     {
         i_capabilities |= CPU_CAPABILITY_MMXEXT;
     }
+out:
 
-#   if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) \
-     && defined( HAVE_SIGNAL_H )
-    signal( SIGILL, pf_sigill );
-#   endif
-    return i_capabilities;
+#elif defined (__arm__)
 
-#elif defined( __powerpc__ )
+# if defined (__ARM_NEON__)
+    i_capabilities |= CPU_CAPABILITY_NEON;
+# elif defined (CAN_COMPILE_NEON)
+#  define NEED_RUNTIME_CPU_CHECK 1
+# endif
 
-#   ifdef CAN_COMPILE_ALTIVEC && defined( HAVE_SIGNAL_H )
-    void (*pf_sigill) (int) = signal( SIGILL, SigHandler );
+# ifdef NEED_RUNTIME_CPU_CHECK
+#  if defined (__linux__)
+    FILE *info = fopen ("/proc/cpuinfo", "rt");
+    if (info != NULL)
+    {
+        char *line = NULL;
+        size_t linelen = 0;
 
-    i_capabilities |= CPU_CAPABILITY_FPU;
+        while (getline (&line, &linelen, info) != -1)
+        {
+            const char *cap;
 
-    i_illegal = 0;
+            if (strncmp (line, "Features\t:", 10))
+                continue;
 
-    if( setjmp( env ) == 0 )
-    {
-        asm volatile ("mtspr 256, %0\n\t"
-                      "vand %%v0, %%v0, %%v0"
-                      :
-                      : "r" (-1));
+            /* TODO: detect other CPU features when we use them */
+#   if defined (CAN_COMPILE_NEON) && !defined (__ARM_NEON__)
+                cap = strstr (line + 10, " neon");
+            if (cap != NULL && (cap[5] == '\0' || cap[5] == ' '))
+                i_capabilities |= CPU_CAPABILITY_NEON;
+#   endif
+            break;
+        }
+        fclose (info);
+        free (line);
     }
+#  else
+#   warning Run-time CPU detection missing: optimizations disabled!
+#  endif
+# endif
 
-    if( i_illegal == 0 )
-    {
-        i_capabilities |= CPU_CAPABILITY_ALTIVEC;
-    }
+#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
+    || defined( __ppc64__ )
 
-    signal( SIGILL, pf_sigill );
+#   if defined(__APPLE__) || defined(__OpenBSD__)
+#   if defined(__OpenBSD__)
+    int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
+#   else
+    int selectors[2] = { CTL_HW, HW_VECTORUNIT };
 #   endif
+    int i_has_altivec = 0;
+    size_t i_length = sizeof( i_has_altivec );
+    int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
 
-    return i_capabilities;
+    if( i_error == 0 && i_has_altivec != 0 )
+        i_capabilities |= CPU_CAPABILITY_ALTIVEC;
 
-#elif defined( __sparc__ )
+#   elif defined( CAN_COMPILE_ALTIVEC )
+    check_capability( "Altivec", CPU_CAPABILITY_ALTIVEC,
+        "mtspr 256, %0\n\t"
+        "vand %%v0, %%v0, %%v0",
+                      "r" (-1));
 
-    i_capabilities |= CPU_CAPABILITY_FPU;
-    return i_capabilities;
+#   endif
 
-#elif defined( _MSC_VER ) && !defined( UNDER_CE )
-    i_capabilities |= CPU_CAPABILITY_FPU;
-    return i_capabilities;
+#endif
 
-#else
-    /* default behaviour */
-    return i_capabilities;
+    cpu_flags = i_capabilities;
+}
 
+/**
+ * Retrieves pre-computed CPU capability flags
+ */
+unsigned vlc_CPU (void)
+{
+/* On Windows and OS/2,
+ * initialized from DllMain() and _DLL_InitTerm() respectively, instead */
+#if !defined(WIN32) && !defined(__OS2__)
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once (&once, vlc_CPU_init);
 #endif
+    return cpu_flags;
 }
 
-/*****************************************************************************
- * SigHandler: system signal handler
- *****************************************************************************
- * This function is called when an illegal instruction signal is received by
- * the program. We use this function to test OS and CPU capabilities
- *****************************************************************************/
-#if defined( HAVE_SIGNAL_H )
-static void SigHandler( int i_signal )
+void vlc_CPU_dump (vlc_object_t *obj)
 {
-    /* Acknowledge the signal received */
-    i_illegal = 1;
+    const unsigned flags = vlc_CPU();
+    char buf[200], *p = buf;
+
+#define PRINT_CAPABILITY( capability, string ) \
+    if (flags & (capability)) \
+        p += sprintf (p, "%s ", (string) )
+
+#if defined (__i386__) || defined (__x86_64__)
+    PRINT_CAPABILITY(CPU_CAPABILITY_MMX, "MMX");
+    PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
+    PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
+    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A,  "SSE4A");
+
+#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
+    PRINT_CAPABILITY(CPU_CAPABILITY_ALTIVEC, "AltiVec");
+
+#elif defined (__arm__)
+    PRINT_CAPABILITY(CPU_CAPABILITY_NEON, "NEONv1");
 
-#ifdef HAVE_SIGRELSE
-    sigrelse( i_signal );
 #endif
 
-#if defined( __i386__ )
-    fprintf( stderr, "warning: your CPU has %s instructions, but not your "
-                     "operating system.\n", psz_capability );
-    fprintf( stderr, "         some optimizations will be disabled unless "
-                     "you upgrade your OS\n" );
-#   if defined( SYS_LINUX )
-    fprintf( stderr, "         (for instance Linux kernel 2.4.x or later)\n" );
-#   endif
+#if HAVE_FPU
+    p += sprintf (p, "FPU ");
 #endif
 
-    longjmp( env, 1 );
+    if (p > buf)
+        msg_Dbg (obj, "CPU has capabilities %s", buf);
 }
-#endif
 
+
+static vlc_memcpy_t pf_vlc_memcpy = memcpy;
+
+void vlc_fastmem_register (vlc_memcpy_t cpy)
+{
+    assert (cpy != NULL);
+    pf_vlc_memcpy = cpy;
+}
+
+/**
+ * vlc_memcpy: fast CPU-dependent memcpy
+ */
+void *vlc_memcpy (void *tgt, const void *src, size_t n)
+{
+    return pf_vlc_memcpy (tgt, src, n);
+}