]> git.sesse.net Git - x264/commitdiff
x86: Update intel compiler cpu dispatcher override for new versions of ICC/ICL
authorAnton Mitrofanov <BugMaster@narod.ru>
Mon, 15 Dec 2014 15:49:23 +0000 (18:49 +0300)
committerAnton Mitrofanov <BugMaster@narod.ru>
Sat, 20 Dec 2014 16:29:44 +0000 (19:29 +0300)
common/cpu.h
common/osdep.c
common/x86/cpu-a.asm
configure
encoder/encoder.c
extras/intel_dispatcher.h [new file with mode: 0644]

index 6d08027a647a2e13a331be2845c46fe3dbe0db37..07e5c6cb433b7335cee46ddb3e46187bb64c2390 100644 (file)
@@ -45,7 +45,6 @@ void     x264_cpu_sfence( void );
 #define x264_emms()
 #endif
 #define x264_sfence x264_cpu_sfence
-void     x264_safe_intel_cpu_indicator_init( void );
 
 /* kludge:
  * gcc can't give variables any greater alignment than the stack frame has.
index e97aaeda11cd734f53bca0f9d9c4e08f8066ca14..91f3fdd74f9bf23519ad14c15a2679cd5add6474 100644 (file)
@@ -94,51 +94,6 @@ int x264_threading_init( void )
 }
 #endif
 
-#if HAVE_MMX
-#ifdef __INTEL_COMPILER
-/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of
- * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30)
- * adapted to x264's cpu schema. */
-
-// Global variable indicating cpu
-int __intel_cpu_indicator = 0;
-// CPU dispatcher function
-void x264_intel_cpu_indicator_init( void )
-{
-    unsigned int cpu = x264_cpu_detect();
-    if( cpu&X264_CPU_AVX )
-        __intel_cpu_indicator = 0x20000;
-    else if( cpu&X264_CPU_SSE42 )
-        __intel_cpu_indicator = 0x8000;
-    else if( cpu&X264_CPU_SSE4 )
-        __intel_cpu_indicator = 0x2000;
-    else if( cpu&X264_CPU_SSSE3 )
-        __intel_cpu_indicator = 0x1000;
-    else if( cpu&X264_CPU_SSE3 )
-        __intel_cpu_indicator = 0x800;
-    else if( cpu&X264_CPU_SSE2 && !(cpu&X264_CPU_SSE2_IS_SLOW) )
-        __intel_cpu_indicator = 0x200;
-    else if( cpu&X264_CPU_SSE )
-        __intel_cpu_indicator = 0x80;
-    else if( cpu&X264_CPU_MMX2 )
-        __intel_cpu_indicator = 8;
-    else
-        __intel_cpu_indicator = 1;
-}
-
-/* __intel_cpu_indicator_init appears to have a non-standard calling convention that
- * assumes certain registers aren't preserved, so we'll route it through a function
- * that backs up all the registers. */
-void __intel_cpu_indicator_init( void )
-{
-    x264_safe_intel_cpu_indicator_init();
-}
-#else
-void x264_intel_cpu_indicator_init( void )
-{}
-#endif
-#endif
-
 #ifdef _WIN32
 /* Functions for dealing with Unicode on Windows. */
 FILE *x264_fopen( const char *filename, const char *mode )
index bcf6c4340dac66455b3ab0a16bc8b072e66ce64b..4dfd77540df73d2a111dc96cc029fc272b809767 100644 (file)
@@ -145,53 +145,3 @@ cglobal cpu_emms
 cglobal cpu_sfence
     sfence
     ret
-
-cextern intel_cpu_indicator_init
-
-;-----------------------------------------------------------------------------
-; void safe_intel_cpu_indicator_init( void );
-;-----------------------------------------------------------------------------
-cglobal safe_intel_cpu_indicator_init
-    push r0
-    push r1
-    push r2
-    push r3
-    push r4
-    push r5
-    push r6
-%if ARCH_X86_64
-    push r7
-    push r8
-    push r9
-    push r10
-    push r11
-    push r12
-    push r13
-    push r14
-%endif
-    push rbp
-    mov  rbp, rsp
-%if WIN64
-    sub  rsp, 32 ; shadow space
-%endif
-    and  rsp, ~31
-    call intel_cpu_indicator_init
-    leave
-%if ARCH_X86_64
-    pop r14
-    pop r13
-    pop r12
-    pop r11
-    pop r10
-    pop r9
-    pop r8
-    pop r7
-%endif
-    pop r6
-    pop r5
-    pop r4
-    pop r3
-    pop r2
-    pop r1
-    pop r0
-    ret
index 02fe4be3fc4410641f1ba3cbc69893d871338b53..e2977bdf25abb5acf8592f8ed6a0907aab2fa548 100755 (executable)
--- a/configure
+++ b/configure
@@ -311,7 +311,7 @@ NL="
 
 # list of all preprocessor HAVE values we can define
 CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
-             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC"
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER"
 
 # parse options
 
@@ -1093,6 +1093,12 @@ if cc_check '' -Wmaybe-uninitialized ; then
     CFLAGS="-Wno-maybe-uninitialized $CFLAGS"
 fi
 
+if [ $compiler = ICC -o $compiler = ICL ] ; then
+    if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then
+        define HAVE_INTEL_DISPATCHER
+    fi
+fi
+
 if [ "$bit_depth" -gt "8" ]; then
     define HIGH_BIT_DEPTH
     ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1"
index c98a900ed978995ee9265bfe9bbb12ec658c3f40..54d2e5a896af12b2bc49f5e3509ec0e369e33560 100644 (file)
@@ -32,6 +32,9 @@
 #include "ratecontrol.h"
 #include "macroblock.h"
 #include "me.h"
+#if HAVE_INTEL_DISPATCHER
+#include "extras/intel_dispatcher.h"
+#endif
 
 //#define DEBUG_MB_TYPE
 
@@ -1390,6 +1393,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
     if( param->param_free )
         param->param_free( param );
 
+#if HAVE_INTEL_DISPATCHER
+    x264_intel_dispatcher_override();
+#endif
+
     if( x264_threading_init() )
     {
         x264_log( h, X264_LOG_ERROR, "unable to initialize threading\n" );
diff --git a/extras/intel_dispatcher.h b/extras/intel_dispatcher.h
new file mode 100644 (file)
index 0000000..8837c62
--- /dev/null
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * intel_dispatcher.h: intel compiler cpu dispatcher override
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_INTEL_DISPATCHER_H
+#define X264_INTEL_DISPATCHER_H
+
+/* Feature flags using _FEATURE_* defines from immintrin.h */
+extern unsigned long long __intel_cpu_feature_indicator;
+extern unsigned long long __intel_cpu_feature_indicator_x;
+
+/* CPU vendor independent version of dispatcher */
+void __intel_cpu_features_init_x( void );
+
+static void x264_intel_dispatcher_override( void )
+{
+    if( __intel_cpu_feature_indicator & ~1ULL )
+        return;
+    __intel_cpu_feature_indicator = 0;
+    __intel_cpu_feature_indicator_x = 0;
+    __intel_cpu_features_init_x();
+    __intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x;
+}
+
+#endif