]> git.sesse.net Git - vlc/blobdiff - src/misc/cpu.c
Added SSSE3/SSE4.1/SSE4.2 support to libvlc.
[vlc] / src / misc / cpu.c
index 2bbff245f05aa6dea38488d927fd3bdf3e82b0ba..5b46fa9f789d17a77c5b6fc89fba3db2560315c0 100644 (file)
@@ -31,6 +31,7 @@
 #endif
 
 #include <vlc_common.h>
+#include <vlc_cpu.h>
 
 #include <sys/types.h>
 #ifndef WIN32
@@ -81,7 +82,7 @@ static bool check_OS_capability( const char *psz_capability, pid_t pid )
  *****************************************************************************/
 uint32_t CPUCapabilities( void )
 {
-    uint32_t i_capabilities = CPU_CAPABILITY_NONE;
+    uint32_t i_capabilities = 0;
 
 #if defined( __i386__ ) || defined( __x86_64__ )
      unsigned int i_eax, i_ebx, i_ecx, i_edx;
@@ -111,8 +112,18 @@ uint32_t CPUCapabilities( void )
                          : "a"  ( reg )        \
                          : "cc" );
 #   endif
-
-    i_capabilities |= CPU_CAPABILITY_FPU;
+     /* Check if the OS really supports the requested instructions */
+#   define check_capability(name, flag, code)  \
+     do {                                      \
+        pid_t pid = fork();                    \
+        if( pid == 0 )                         \
+        {                                      \
+            __asm__ __volatile__ ( code : : ); \
+            exit(0);                           \
+        }                                      \
+        if( check_OS_capability((name), pid )) \
+            i_capabilities |= (flag);          \
+     } while(0)
 
 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
   && !defined (__i686__) && !defined (__pentium4__) \
@@ -168,36 +179,50 @@ uint32_t CPUCapabilities( void )
         i_capabilities |= CPU_CAPABILITY_MMXEXT;
 
 #   ifdef CAN_COMPILE_SSE
-        /* We test if OS supports the SSE instructions */
-        pid_t pid = fork();
-        if( pid == 0 )
-        {
-            /* Test a SSE instruction */
-            __asm__ __volatile__ ( "xorps %%xmm0,%%xmm0\n" : : );
-            exit(0);
-        }
-        if( check_OS_capability( "SSE", pid ) )
-            i_capabilities |= CPU_CAPABILITY_SSE;
+        check_capability( "SSE", CPU_CAPABILITY_SSE,
+                          "xorps %%xmm0,%%xmm0\n" );
 #   endif
     }
 # endif
 
 # if defined (__SSE2__)
     i_capabilities |= CPU_CAPABILITY_SSE2;
-# elif defined (CAN_COMPILE_SSE)
+# elif defined (CAN_COMPILE_SSE2)
     if( i_edx & 0x04000000 )
-    {
-        /* We test if OS supports the SSE2 instructions */
-        pid_t pid = fork();
-        if( pid == 0 )
-        {
-            /* Test a SSE2 instruction */
-            __asm__ __volatile__ ( "movupd %%xmm0, %%xmm0\n" : : );
-            exit(0);
-        }
-        if( check_OS_capability( "SSE2", pid ) )
-            i_capabilities |= CPU_CAPABILITY_SSE2;
-    }
+        check_capability( "SSE2", CPU_CAPABILITY_SSE2,
+                          "movupd %%xmm0, %%xmm0\n" );
+# endif
+
+# if defined (__SSE3__)
+    i_capabilities |= CPU_CAPABILITY_SSE3;
+# elif defined (CAN_COMPILE_SSE3)
+    if( i_ecx & 0x00000001 )
+        check_capability( "SSE3", CPU_CAPABILITY_SSE3,
+                          "movsldup %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSSE3__)
+    i_capabilities |= CPU_CAPABILITY_SSSE3;
+# elif defined (CAN_COMPILE_SSSE3)
+    if( i_ecx & 0x00000200 )
+        check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
+                          "pabsw %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSE4_1__)
+    i_capabilities |= CPU_CAPABILITY_SSE4_1;
+# elif defined (CAN_COMPILE_SSE4_1)
+    if( i_ecx & 0x00080000 )
+        check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
+                          "pmaxsb %%xmm1, %%xmm0\n" );
+# endif
+
+# if defined (__SSE4_2__)
+    i_capabilities |= CPU_CAPABILITY_SSE4_2;
+# elif defined (CAN_COMPILE_SSE4_2)
+    if( i_ecx & 0x00100000 )
+        check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
+                          "pcmpgtq %%xmm1, %%xmm0\n" );
 # endif
 
     /* test for additional capabilities */
@@ -213,17 +238,8 @@ uint32_t CPUCapabilities( void )
     i_capabilities |= CPU_CAPABILITY_3DNOW;
 # elif defined (CAN_COMPILE_3DNOW)
     if( i_edx & 0x80000000 )
-    {
-        pid_t pid = fork();
-        if( pid == 0 )
-        {
-            /* Test a 3D Now! instruction */
-            __asm__ __volatile__ ( "pfadd %%mm0,%%mm0\n" "femms\n" : : );
-            exit(0);
-        }
-        if( check_OS_capability( "3D Now!", pid ) )
-            i_capabilities |= CPU_CAPABILITY_3DNOW;
-    }
+        check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
+                          "pfadd %%mm0,%%mm0\n" "femms\n" );
 # endif
 
     if( b_amd && ( i_edx & 0x00400000 ) )
@@ -233,9 +249,6 @@ uint32_t CPUCapabilities( void )
 out:
 
 #elif defined( __arm__ )
-#   if defined( __ARM_EABI__ ) && !defined( __SOFTFP__ )
-//    i_capabilities |= CPU_CAPABILITY_FPU;
-#   endif
 #   if defined( __ARM_NEON__ )
     i_capabilities |= CPU_CAPABILITY_NEON;
 #   endif
@@ -243,8 +256,6 @@ out:
 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
     || defined( __ppc64__ )
 
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
 #   if defined(__APPLE__)
     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
     int i_has_altivec = 0;
@@ -270,12 +281,6 @@ out:
 
 #   endif
 
-#elif defined( __sparc__ )
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
-#elif defined( _MSC_VER ) && !defined( UNDER_CE )
-    i_capabilities |= CPU_CAPABILITY_FPU;
-
 #endif
     return i_capabilities;
 }