]> git.sesse.net Git - vlc/commitdiff
Check for SSE2 at build-time if possible
authorRémi Denis-Courmont <remi@remlab.net>
Sat, 4 Aug 2012 13:26:40 +0000 (16:26 +0300)
committerRémi Denis-Courmont <remi@remlab.net>
Sat, 4 Aug 2012 13:37:42 +0000 (16:37 +0300)
16 files changed:
include/vlc_cpu.h
modules/codec/avcodec/avcodec.c
modules/codec/avcodec/copy.c
modules/codec/avcodec/encoder.c
modules/codec/x264.c
modules/stream_out/switcher.c
modules/video_chroma/i420_rgb.c
modules/video_chroma/i420_yuy2.c
modules/video_chroma/i422_yuy2.c
modules/video_filter/deinterlace/algo_yadif.c
modules/video_filter/deinterlace/deinterlace.c
modules/video_filter/gradfun.c
modules/video_filter/grain.c
modules/video_filter/sepia.c
src/misc/cpu.c
src/posix/linux_cpu.c

index 437ccbba8107ca38bfa977aa29c97407757924b2..f233b6608b3154298cb1a9b3da32017ae5cac10d 100644 (file)
@@ -34,7 +34,7 @@ VLC_API unsigned vlc_CPU(void);
 #  define CPU_CAPABILITY_3DNOW   (1<<4)
 #  define VLC_CPU_MMXEXT 32
 #  define VLC_CPU_SSE    64
-#  define CPU_CAPABILITY_SSE2    (1<<7)
+#  define VLC_CPU_SSE2   128
 #  define CPU_CAPABILITY_SSE3    (1<<8)
 #  define CPU_CAPABILITY_SSSE3   (1<<9)
 #  define CPU_CAPABILITY_SSE4_1  (1<<10)
@@ -67,6 +67,12 @@ VLC_API unsigned vlc_CPU(void);
 #  endif
 # endif
 
+# ifdef __SSE2__
+#  define vlc_CPU_SSE2() (1)
+# else
+#  define vlc_CPU_SSE2() ((vlc_CPU() & VLC_CPU_SSE2) != 0)
+# endif
+
 # elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
 #  define HAVE_FPU 1
 #  define VLC_CPU_ALTIVEC 2
index a7c66780ddd2bef157fbcdfe6172fc14b5cb339a..3f606b604eea784326d6f7ed8fae58ed4fca11ab 100644 (file)
@@ -340,7 +340,7 @@ static int OpenDecoder( vlc_object_t *p_this )
         p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
     if( !vlc_CPU_SSE() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE;
-    if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+    if( !vlc_CPU_SSE2() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
 # ifdef AV_CPU_FLAG_SSE3
     if( !(i_cpu & CPU_CAPABILITY_SSE3) )
index 3c2706926ea64ebaace35d6b03afd76e70623f0b..1573b93a2cafe278e21d4bce0fb6eb1f4c6f5ff9 100644 (file)
 
 /* Execute the instruction op only if SSE2 is supported. */
 #ifdef CAN_COMPILE_SSE2
-#   define ASM_SSE2(cpu, op) do {          \
-        if (cpu & CPU_CAPABILITY_SSE2)  \
-            asm volatile (op);    \
+# ifdef __SSE2__
+#  define ASM_SSE2(cpu, op) asm volatile (op)
+# else
+#  define ASM_SSE2(cpu, op) do { \
+    if (cpu & VLC_CPU_SSE2) \
+        asm volatile (op); \
     } while (0)
+#  undef vlc_CPU_SSE2
+#  define vlc_CPU_SSE2() ((cpu & VLC_CPU_SSE2) != 0)
+# endif
 #else
-#   define ASM_SSE2(cpu, op)
+# define ASM_SSE2(cpu, op)
 #endif
 
 /* Optimized copy from "Uncacheable Speculative Write Combining" memory
@@ -88,7 +94,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
         } else
 #endif
 #ifdef CAN_COMPILE_SSE2
-        if (cpu & CPU_CAPABILITY_SSE2) {
+        if (vlc_CPU_SSE2()) {
             if (!unaligned) {
                 for (; x+63 < width; x += 64)
                     COPY64(&dst[x], &src[x], "movdqa", "movdqa");
@@ -121,7 +127,7 @@ static void Copy2d(uint8_t *dst, size_t dst_pitch,
         bool unaligned = ((intptr_t)dst & 0x0f) != 0;
 
 #ifdef CAN_COMPILE_SSE2
-        if (cpu & CPU_CAPABILITY_SSE2) {
+        if (vlc_CPU_SSE2()) {
             if (!unaligned) {
                 for (; x+63 < width; x += 64)
                     COPY64(&dst[x], &src[x], "movdqa", "movntdq");
@@ -189,7 +195,7 @@ static void SplitUV(uint8_t *dstu, size_t dstu_pitch,
         } else
 #endif
 #ifdef CAN_COMPILE_SSE2
-        if (cpu & CPU_CAPABILITY_SSE2) {
+        if (vlc_CPU_SSE2()) {
             for (x = 0; x < (width & ~31); x += 32) {
                 asm volatile (
                     "movdqu (%[mask]), %%xmm7\n"
index a851744357087ab5ec506a5bcb6b21523c9fce45..537a44548bbec69d0086809bf5bd794b6fbf5e5d 100644 (file)
@@ -334,7 +334,7 @@ int OpenEncoder( vlc_object_t *p_this )
         p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
     if( !vlc_CPU_SSE() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE;
-    if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+    if( !vlc_CPU_SSE2() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
 # ifdef AV_CPU_FLAG_SSE3
     if( !(i_cpu & CPU_CAPABILITY_SSE3) )
index 64bb8af66afa5b2f152e31a0c5f657c1721c3f04..de3ac8d26f00d9f8e04a76de02ef58090f552dcd 100644 (file)
@@ -1266,7 +1266,7 @@ static int  Open ( vlc_object_t *p_this )
         p_sys->param.cpu &= ~X264_CPU_MMXEXT;
     if( !vlc_CPU_SSE() )
         p_sys->param.cpu &= ~X264_CPU_SSE;
-    if( !(vlc_CPU() & CPU_CAPABILITY_SSE2) )
+    if( !vlc_CPU_SSE2() )
         p_sys->param.cpu &= ~X264_CPU_SSE2;
 #endif
 
index 5e61c183d9adf94e2d58cf8330559ef1726fda31..8c28541e691987503ac0fba33cfccf58a3bb00c0 100644 (file)
@@ -389,7 +389,7 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
         if( !vlc_CPU_SSE() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
-        if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+        if( !vlc_cpu_SSE2() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
 # ifdef AV_CPU_FLAG_SSE3
         if( !(i_cpu & CPU_CAPABILITY_SSE3) )
@@ -810,7 +810,7 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
         if( !vlc_CPU_SSE() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
-        if( !(i_cpu & CPU_CAPABILITY_SSE2) )
+        if( !vlc_CPU_SSE2() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
 # ifdef AV_CPU_FLAG_SSE3
         if( !(i_cpu & CPU_CAPABILITY_SSE3) )
index 4ff2cc74e0be0c7d779b65c1203b1d8630a93212..9cf849d14e0c688a36efffa2c49463ee0ed4e243 100644 (file)
@@ -94,7 +94,7 @@ vlc_module_begin ()
     set_description( N_( "SSE2 I420,IYUV,YV12 to "
                         "RV15,RV16,RV24,RV32 conversions") )
     set_capability( "video filter2", 120 )
-# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0)
+# define vlc_CPU_capable() vlc_CPU_SSE2()
 #endif
     set_callbacks( Activate, Deactivate )
 vlc_module_end ()
index 79ad697af79384b7681d96567c36a3987222a5f8..59969fb38bdbcfbfe9a60aa3df0f29556925d573 100644 (file)
@@ -96,12 +96,12 @@ vlc_module_begin ()
 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
     set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
     set_capability( "video filter2", 250 )
-# define vlc_CPU_capable() (vlc_CPU() & CPU_CAPABILITY_SSE2)
+# define vlc_CPU_capable() vlc_CPU_SSE2()
 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
     set_description(
             _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
     set_capability( "video filter2", 250 )
-# define vlc_CPU_capable() (vlc_CPU_ALTIVEC())
+# define vlc_CPU_capable() vlc_CPU_ALTIVEC()
 #endif
     set_callbacks( Activate, NULL )
 vlc_module_end ()
index 53bcb544217ff9d678f451704ab941c7f5ca7041..4cbf1e75e2ab359c298b036947624e09058bc11c 100644 (file)
@@ -81,7 +81,7 @@ vlc_module_begin ()
 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
     set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
     set_capability( "video filter2", 120 )
-# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0)
+# define vlc_CPU_capable() vlc_CPU_SSE2()
 # define VLC_TARGET VLC_SSE
 #endif
     set_callbacks( Activate, NULL )
index b055a810ff0f2951d105638bd9e3ab9ac457df29..8345d3a87b2b2956ce0e7d0cd945d9a551028106 100644 (file)
@@ -108,19 +108,22 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
         void (*filter)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next,
                        int w, int prefs, int mrefs, int parity, int mode);
 
-        filter = yadif_filter_line_c;
-#if defined(HAVE_YADIF_MMX)
-        if( vlc_CPU_MMX() )
-            filter = yadif_filter_line_mmx;
+#if defined(HAVE_YADIF_SSSE3)
+        if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
+            filter = yadif_filter_line_ssse3;
+        else
 #endif
 #if defined(HAVE_YADIF_SSE2)
-        if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
+        if( vlc_CPU_SSE2() )
             filter = yadif_filter_line_sse2;
+        else
 #endif
-#if defined(HAVE_YADIF_SSSE3)
-        if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
-            filter = yadif_filter_line_ssse3;
+#if defined(HAVE_YADIF_MMX)
+        if( vlc_CPU_MMX() )
+            filter = yadif_filter_line_mmx;
+        else
 #endif
+            filter = yadif_filter_line_c;
 
         for( int n = 0; n < p_dst->i_planes; n++ )
         {
index 375411d8d623d6cc6502149e0de8d495df11ee5a..ab02cbb55a990aac13485a82f3c8d017b4e0a214 100644 (file)
@@ -632,8 +632,8 @@ int Open( vlc_object_t *p_this )
         p_sys->pf_merge = MergeAltivec;
     else
 #endif
-#if defined(CAN_COMPILE_SSE)
-    if( (vlc_CPU() & CPU_CAPABILITY_SSE2) )
+#if defined(CAN_COMPILE_SSE2)
+    if( vlc_CPU_SSE2() )
     {
         p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
         p_sys->pf_end_merge = EndMMX;
index d3d153c84f974cec57660ee4dbfbf368db821e99..701a119e5f003f2e66ae338d31ffb443585e59a6 100644 (file)
@@ -135,7 +135,7 @@ static int Open(vlc_object_t *object)
     cfg->buf         = NULL;
 
 #if HAVE_SSE2 && HAVE_6REGS
-    if (vlc_CPU() & CPU_CAPABILITY_SSE2)
+    if (vlc_CPU_SSE2())
         cfg->blur_line = blur_line_sse2;
     else
 #endif
index c447876dcfac1a9d304a7fa5bb385a5dd85f053b..8fca662b3aa3b85c7c04d2563fe66859cf787b15 100644 (file)
@@ -409,7 +409,7 @@ static int Open(vlc_object_t *object)
     sys->blend = BlockBlendC;
     sys->emms  = NULL;
 #if defined(CAN_COMPILE_SSE2) && 1
-    if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
+    if (vlc_CPU_SSE2()) {
         sys->blend = BlockBlendSse2;
         sys->emms  = Emms;
     }
index 052e70e59abf542f62cd0e4592992457eaa24c08..3e59e80cb6f42e0354cef12c8abb8c7bb844ec47 100644 (file)
@@ -245,7 +245,7 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
     const uint8_t filling_const_8v = 128 + i_intensity / 14;
 
 #if defined(CAN_COMPILE_SSE2)
-    if (vlc_CPU() & CPU_CAPABILITY_SSE2)
+    if (vlc_CPU_SSE2())
     {
         /* prepared value for faster broadcasting in xmm register */
         int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
index 3d997bbab08818037d98db04e552b5e8a5078654..fb193fc3fce8747b7bcf8e7959f549e72bad884b 100644 (file)
@@ -232,11 +232,9 @@ void vlc_CPU_init (void)
 #   endif
     }
 
-# if defined (__SSE2__)
-    i_capabilities |= CPU_CAPABILITY_SSE2;
-# elif defined (CAN_COMPILE_SSE2)
+# if defined (CAN_COMPILE_SSE2)
     if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test))
-        i_capabilities |= CPU_CAPABILITY_SSE2;
+        i_capabilities |= VLC_CPU_SSE2;
 # endif
 
 # if defined (__SSE3__)
@@ -348,7 +346,7 @@ void vlc_CPU_dump (vlc_object_t *obj)
     if (vlc_CPU_MMX()) p += sprintf (p, "MMX ");
     if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT ");
     if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");;
-    PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
+    if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
     PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
     PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
     PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
index f93472e9bbd884354a879044b7732f11b5084a88..57b4326917e71e97b8f8cff871e3c43985044155 100644 (file)
@@ -73,10 +73,8 @@ static void vlc_CPU_init (void)
                 core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT;
             if (!strcmp (cap, "mmxext"))
                 core_caps |= VLC_CPU_MMXEXT;
-# ifndef __SSE2__
             if (!strcmp (cap, "sse2"))
-                core_caps |= CPU_CAPABILITY_SSE2;
-# endif
+                core_caps |= VLC_CPU_SSE2;
 # ifndef __SSE3__
             if (!strcmp (cap, "pni"))
                 core_caps |= CPU_CAPABILITY_SSE3;
@@ -117,9 +115,6 @@ static void vlc_CPU_init (void)
 
     /* Always enable capabilities that were forced during compilation */
 #if defined (__i386__) || defined (__x86_64__)
-# ifdef __SSE2__
-    all_caps |= CPU_CAPABILITY_SSE2;
-# endif
 # ifdef __SSE3__
     all_caps |= CPU_CAPABILITY_SSE3;
 # endif