]> git.sesse.net Git - vlc/blobdiff - src/misc/cpu.c
Added SSSE3/SSE4.1/SSE4.2 support to libvlc.
[vlc] / src / misc / cpu.c
index f223bf859c0b133811a48ee61f5062c8f6e19604..5b46fa9f789d17a77c5b6fc89fba3db2560315c0 100644 (file)
@@ -31,6 +31,7 @@
 #endif
 
 #include <vlc_common.h>
+#include <vlc_cpu.h>
 
 #include <sys/types.h>
 #ifndef WIN32
@@ -44,8 +45,8 @@
 #include <sys/sysctl.h>
 #endif
 
-#if defined( __i386__ ) || defined( __x86_64__ ) \
- || defined( __ppc__ ) || defined( __ppc64__ )
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
+ || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
 static bool check_OS_capability( const char *psz_capability, pid_t pid )
 {
 #ifndef WIN32
@@ -81,7 +82,7 @@ static bool check_OS_capability( const char *psz_capability, pid_t pid )
  *****************************************************************************/
 uint32_t CPUCapabilities( void )
 {
-    uint32_t i_capabilities = CPU_CAPABILITY_NONE;
+    uint32_t i_capabilities = 0;
 
 #if defined( __i386__ ) || defined( __x86_64__ )
      unsigned int i_eax, i_ebx, i_ecx, i_edx;
@@ -111,10 +112,22 @@ uint32_t CPUCapabilities( void )
                          : "a"  ( reg )        \
                          : "cc" );
 #   endif
-
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
-#   if defined( __i386__ )
+     /* Check if the OS really supports the requested instructions */
+#   define check_capability(name, flag, code)  \
+     do {                                      \
+        pid_t pid = fork();                    \
+        if( pid == 0 )                         \
+        {                                      \
+            __asm__ __volatile__ ( code : : ); \
+            exit(0);                           \
+        }                                      \
+        if( check_OS_capability((name), pid )) \
+            i_capabilities |= (flag);          \
+     } while(0)
+
+# if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
+  && !defined (__i686__) && !defined (__pentium4__) \
+  && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
     /* check if cpuid instruction is supported */
     asm volatile ( "push %%ebx\n\t"
                    "pushf\n\t"
@@ -134,20 +147,17 @@ uint32_t CPUCapabilities( void )
 
     if( i_eax == i_ebx )
         goto out;
-#   else
-    /* x86_64 supports cpuid instruction, so we dont need to check it */
-#   endif
-
-    i_capabilities |= CPU_CAPABILITY_486;
+# endif
 
     /* the CPU supports the CPUID instruction - get its level */
     cpuid( 0x00000000 );
 
+# if defined (__i386__) && !defined (__i586__) \
+  && !defined (__i686__) && !defined (__pentium4__) \
+  && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
     if( !i_eax )
         goto out;
-
-    /* FIXME: this isn't correct, since some 486s have cpuid */
-    i_capabilities |= CPU_CAPABILITY_586;
+#endif
 
     /* borrowed from mpeg2dec */
     b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
@@ -155,45 +165,65 @@ uint32_t CPUCapabilities( void )
 
     /* test for the MMX flag */
     cpuid( 0x00000001 );
-
+# if !defined (__MMX__)
     if( ! (i_edx & 0x00800000) )
         goto out;
-
+# endif
     i_capabilities |= CPU_CAPABILITY_MMX;
 
+# if defined (__SSE__)
+    i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
+# else
     if( i_edx & 0x02000000 )
     {
         i_capabilities |= CPU_CAPABILITY_MMXEXT;
 
 #   ifdef CAN_COMPILE_SSE
-        /* We test if OS supports the SSE instructions */
-        pid_t pid = fork();
-        if( pid == 0 )
-        {
-            /* Test a SSE instruction */
-            __asm__ __volatile__ ( "xorps %%xmm0,%%xmm0\n" : : );
-            exit(0);
-        }
-        if( check_OS_capability( "SSE", pid ) )
-            i_capabilities |= CPU_CAPABILITY_SSE;
+        check_capability( "SSE", CPU_CAPABILITY_SSE,
+                          "xorps %%xmm0,%%xmm0\n" );
 #   endif
     }
+# endif
 
+# if defined (__SSE2__)
+    i_capabilities |= CPU_CAPABILITY_SSE2;
+# elif defined (CAN_COMPILE_SSE2)
     if( i_edx & 0x04000000 )
-    {
-#   if defined(CAN_COMPILE_SSE)
-        /* We test if OS supports the SSE2 instructions */
-        pid_t pid = fork();
-        if( pid == 0 )
-        {
-            /* Test a SSE2 instruction */
-            __asm__ __volatile__ ( "movupd %%xmm0, %%xmm0\n" : : );
-            exit(0);
-        }
-        if( check_OS_capability( "SSE2", pid ) )
-            i_capabilities |= CPU_CAPABILITY_SSE2;
-#   endif
-    }
+        check_capability( "SSE2", CPU_CAPABILITY_SSE2,
+                          "movupd %%xmm0, %%xmm0\n" );
+# endif
+
+# if defined (__SSE3__)
+    i_capabilities |= CPU_CAPABILITY_SSE3;
+# elif defined (CAN_COMPILE_SSE3)
+    if( i_ecx & 0x00000001 )
+        check_capability( "SSE3", CPU_CAPABILITY_SSE3,
+                          "movsldup %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSSE3__)
+    i_capabilities |= CPU_CAPABILITY_SSSE3;
+# elif defined (CAN_COMPILE_SSSE3)
+    if( i_ecx & 0x00000200 )
+        check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
+                          "pabsw %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSE4_1__)
+    i_capabilities |= CPU_CAPABILITY_SSE4_1;
+# elif defined (CAN_COMPILE_SSE4_1)
+    if( i_ecx & 0x00080000 )
+        check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
+                          "pmaxsb %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSE4_2__)
+    i_capabilities |= CPU_CAPABILITY_SSE4_2;
+# elif defined (CAN_COMPILE_SSE4_2)
+    if( i_ecx & 0x00100000 )
+        check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
+                          "pcmpgtq %%xmm1, %%xmm0\n" );
+# endif
 
     /* test for additional capabilities */
     cpuid( 0x80000000 );
@@ -204,20 +234,13 @@ uint32_t CPUCapabilities( void )
     /* list these additional capabilities */
     cpuid( 0x80000001 );
 
-#   ifdef CAN_COMPILE_3DNOW
+# if defined (__3dNOW__)
+    i_capabilities |= CPU_CAPABILITY_3DNOW;
+# elif defined (CAN_COMPILE_3DNOW)
     if( i_edx & 0x80000000 )
-    {
-        pid_t pid = fork();
-        if( pid == 0 )
-        {
-            /* Test a 3D Now! instruction */
-            __asm__ __volatile__ ( "pfadd %%mm0,%%mm0\n" "femms\n" : : );
-            exit(0);
-        }
-        if( check_OS_capability( "3D Now!", pid ) )
-            i_capabilities |= CPU_CAPABILITY_3DNOW;
-    }
-#   endif
+        check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
+                          "pfadd %%mm0,%%mm0\n" "femms\n" );
+# endif
 
     if( b_amd && ( i_edx & 0x00400000 ) )
     {
@@ -226,13 +249,12 @@ uint32_t CPUCapabilities( void )
 out:
 
 #elif defined( __arm__ )
-#   if defined( __ARM_EABI__ ) && !defined( __SOFTFP__ )
-    i_capabilities |= CPU_CAPABILITY_FPU;
+#   if defined( __ARM_NEON__ )
+    i_capabilities |= CPU_CAPABILITY_NEON;
 #   endif
 
-#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __ppc64__ )
-
-    i_capabilities |= CPU_CAPABILITY_FPU;
+#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
+    || defined( __ppc64__ )
 
 #   if defined(__APPLE__)
     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
@@ -259,12 +281,6 @@ out:
 
 #   endif
 
-#elif defined( __sparc__ )
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
-#elif defined( _MSC_VER ) && !defined( UNDER_CE )
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
 #endif
     return i_capabilities;
 }