X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fmisc%2Fcpu.c;h=81bf1432362aa9df2471207f2257b5a8602660b0;hb=8f327be75dbb072a9d8aeb5fbf6173b85fab6ad0;hp=39e9331390933cd958743f2c11fd4218b7eb675a;hpb=2fa6c9ce280b037c058eed166bfc51e7c70c5d14;p=vlc diff --git a/src/misc/cpu.c b/src/misc/cpu.c index 39e9331390..81bf143236 100644 --- a/src/misc/cpu.c +++ b/src/misc/cpu.c @@ -1,8 +1,8 @@ /***************************************************************************** * cpu.c: CPU detection code ***************************************************************************** - * Copyright (C) 1998-2002 VideoLAN - * $Id: cpu.c,v 1.5 2002/08/19 11:13:45 sam Exp $ + * Copyright (C) 1998-2004 the VideoLAN team + * $Id$ * * Authors: Samuel Hocevar * Christophe Massiot @@ -20,161 +20,186 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. *****************************************************************************/ /***************************************************************************** * Preamble *****************************************************************************/ -#include /* SIGHUP, SIGINT, SIGKILL */ -#include /* longjmp, setjmp */ - -#include - -#ifdef SYS_DARWIN -# include /* AltiVec detection */ -# include /* some day the header files||compiler * - will define it for us */ -# include +#ifdef HAVE_CONFIG_H +# include "config.h" #endif -#include "vlc_cpu.h" +#include +#include -/***************************************************************************** - * Local prototypes - *****************************************************************************/ -static void SigHandler ( int ); -static u32 Capabilities ( vlc_object_t * ); +#include +#ifndef WIN32 +#include +#include +#include +#else +#include +#endif +#include -/***************************************************************************** - * Global variables - they're needed for signal handling - *****************************************************************************/ -static jmp_buf env; -static int i_illegal; -#if defined( __i386__ ) -static char *psz_capability; +#ifdef __APPLE__ +#include #endif -/***************************************************************************** - * CPUCapabilities: get the CPU capabilities - ***************************************************************************** - * This function is a wrapper around Capabilities(). - *****************************************************************************/ -u32 __CPUCapabilities( vlc_object_t *p_this ) -{ - u32 i_capabilities; +#include "libvlc.h" - vlc_mutex_lock( p_this->p_vlc->p_global_lock ); - i_capabilities = Capabilities( p_this ); - vlc_mutex_unlock( p_this->p_vlc->p_global_lock ); - - return i_capabilities; -} +static uint32_t cpu_flags; -/***************************************************************************** - * Capabilities: list the processors MMX support and other capabilities - ***************************************************************************** - * This function is called to list extensions the CPU may have. - *****************************************************************************/ -static u32 Capabilities( vlc_object_t *p_this ) +#if defined (__i386__) || defined (__x86_64__) || defined (__powerpc__) \ + || defined (__ppc__) || defined (__ppc64__) || defined (__powerpc64__) +# if !defined (WIN32) && !defined (__OS2__) +static bool vlc_CPU_check (const char *name, void (*func) (void)) { - volatile u32 i_capabilities = CPU_CAPABILITY_NONE; + pid_t pid = fork(); -#if defined( SYS_DARWIN ) - struct host_basic_info hi; - kern_return_t ret; - host_name_port_t host; + switch (pid) + { + case 0: + signal (SIGILL, SIG_DFL); + func (); + //__asm__ __volatile__ ( code : : input ); + _exit (0); + case -1: + return false; + } + //i_capabilities |= (flag); - int i_size; - char *psz_name, *psz_subname; + int status; + while( waitpid( pid, &status, 0 ) == -1 ); - i_capabilities |= CPU_CAPABILITY_FPU; + if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 ) + return true; - /* Should 'never' fail? */ - host = mach_host_self(); + fprintf (stderr, "Warning: your CPU has %s instructions, but not your " + "operating system.\n", name); + fprintf( stderr, " some optimizations will be disabled unless " + "you upgrade your OS\n" ); + return false; +} - i_size = sizeof( hi ) / sizeof( int ); - ret = host_info( host, HOST_BASIC_INFO, ( host_info_t )&hi, &i_size ); +#if defined (CAN_COMPILE_SSE) && !defined (__SSE__) +VLC_SSE static void SSE_test (void) +{ + asm volatile ("xorps %%xmm0,%%xmm0\n" : : : "xmm0", "xmm1"); +} +#endif +#if defined (CAN_COMPILE_SSE2) && !defined (__SSE2__) +VLC_SSE static void SSE2_test (void) +{ + asm volatile ("movupd %%xmm0, %%xmm0\n" : : : "xmm0", "xmm1"); +} +#endif +#if defined (CAN_COMPILE_SSE3) && !defined (__SSE3__) +VLC_SSE static void SSE3_test (void) +{ + asm volatile ("movsldup %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1"); +} +#endif +#if defined (CAN_COMPILE_SSSE3) && !defined (__SSSE3__) +VLC_SSE static void SSSE3_test (void) +{ + asm volatile ("pabsw %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1"); +} +#endif +#if defined (CAN_COMPILE_SSE4_1) && !defined (__SSE4_1__) +VLC_SSE static void SSE4_1_test (void) +{ + asm volatile ("pmaxsb %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1"); +} +#endif +#if defined (CAN_COMPILE_SSE4_2) && !defined (__SSE4_2__) +VLC_SSE static void SSE4_2_test (void) +{ + asm volatile ("pcmpgtq %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1"); +} +#endif +#if defined (CAN_COMPILE_3DNOW) && !defined (__3dNOW__) +VLC_MMX static void ThreeD_Now_test (void) +{ + asm volatile ("pfadd %%mm0,%%mm0\n" "femms\n" : : : "mm0"); +} +#endif - if( ret != KERN_SUCCESS ) - { - fprintf( stderr, "error: couldn't get CPU information\n" ); - return i_capabilities; - } +#if defined (CAN_COMPILE_ALTIVEC) +static void Altivec_text (void) +{ + asm volatile ("mtspr 256, %0\n" "vand %%v0, %%v0, %%v0\n" : : "r" (-1)); +} +#endif - slot_name( hi.cpu_type, hi.cpu_subtype, &psz_name, &psz_subname ); - /* FIXME: need better way to detect newer proccessors. - * could do strncmp(a,b,5), but that's real ugly */ - if( !strcmp(psz_name, "ppc7400") || !strcmp(psz_name, "ppc7450") ) - { - i_capabilities |= CPU_CAPABILITY_ALTIVEC; - } +#else /* WIN32 || __OS2__ */ +# define vlc_CPU_check(name, func) (1) +#endif +#endif - return i_capabilities; +/** + * Determines the CPU capabilities and stores them in cpu_flags. + * The result can be retrieved with vlc_CPU(). + */ +void vlc_CPU_init (void) +{ + uint32_t i_capabilities = 0; -#elif defined( __i386__ ) - volatile unsigned int i_eax, i_ebx, i_ecx, i_edx; - volatile vlc_bool_t b_amd; +#if defined( __i386__ ) || defined( __x86_64__ ) + unsigned int i_eax, i_ebx, i_ecx, i_edx; + bool b_amd; /* Needed for x86 CPU capabilities detection */ -# define cpuid( a ) \ - asm volatile ( "pushl %%ebx\n\t" \ - "cpuid\n\t" \ - "movl %%ebx,%1\n\t" \ - "popl %%ebx\n\t" \ - : "=a" ( i_eax ), \ - "=r" ( i_ebx ), \ - "=c" ( i_ecx ), \ - "=d" ( i_edx ) \ - : "a" ( a ) \ - : "cc" ); - -# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) - sighandler_t pf_sigill = signal( SIGILL, SigHandler ); -# endif - - i_capabilities |= CPU_CAPABILITY_FPU; - - /* test for a 486 CPU */ - asm volatile ( "pushl %%ebx\n\t" - "pushfl\n\t" - "popl %%eax\n\t" +# if defined (__i386__) && defined (__PIC__) +# define cpuid(reg) \ + asm volatile ("xchgl %%ebx,%1\n\t" \ + "cpuid\n\t" \ + "xchgl %%ebx,%1\n\t" \ + : "=a" (i_eax), "=r" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \ + : "a" (reg) \ + : "cc"); +# else +# define cpuid(reg) \ + asm volatile ("cpuid\n\t" \ + : "=a" (i_eax), "=b" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \ + : "a" (reg) \ + : "cc"); +# endif + /* Check if the OS really supports the requested instructions */ +# if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \ + && !defined (__i686__) && !defined (__pentium4__) \ + && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__) + /* check if cpuid instruction is supported */ + asm volatile ( "push %%ebx\n\t" + "pushf\n\t" + "pop %%eax\n\t" "movl %%eax, %%ebx\n\t" "xorl $0x200000, %%eax\n\t" - "pushl %%eax\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %%eax\n\t" + "push %%eax\n\t" + "popf\n\t" + "pushf\n\t" + "pop %%eax\n\t" "movl %%ebx,%1\n\t" - "popl %%ebx\n\t" + "pop %%ebx\n\t" : "=a" ( i_eax ), "=r" ( i_ebx ) : : "cc" ); if( i_eax == i_ebx ) - { -# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) - signal( SIGILL, pf_sigill ); -# endif - return i_capabilities; - } - - i_capabilities |= CPU_CAPABILITY_486; + goto out; +# endif /* the CPU supports the CPUID instruction - get its level */ cpuid( 0x00000000 ); +# if defined (__i386__) && !defined (__i586__) \ + && !defined (__i686__) && !defined (__pentium4__) \ + && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__) if( !i_eax ) - { -# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) - signal( SIGILL, pf_sigill ); -# endif - return i_capabilities; - } - - /* FIXME: this isn't correct, since some 486s have cpuid */ - i_capabilities |= CPU_CAPABILITY_586; + goto out; +#endif /* borrowed from mpeg2dec */ b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 ) @@ -182,146 +207,213 @@ static u32 Capabilities( vlc_object_t *p_this ) /* test for the MMX flag */ cpuid( 0x00000001 ); - +# if !defined (__MMX__) if( ! (i_edx & 0x00800000) ) - { -# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) - signal( SIGILL, pf_sigill ); -# endif - return i_capabilities; - } - + goto out; +# endif i_capabilities |= CPU_CAPABILITY_MMX; +# if defined (__SSE__) + i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE; +# else if( i_edx & 0x02000000 ) { i_capabilities |= CPU_CAPABILITY_MMXEXT; # ifdef CAN_COMPILE_SSE - /* We test if OS supports the SSE instructions */ - psz_capability = "SSE"; - i_illegal = 0; - - if( setjmp( env ) == 0 ) - { - /* Test a SSE instruction */ - __asm__ __volatile__ ( "xorps %%xmm0,%%xmm0\n" : : ); - } - - if( i_illegal == 0 ) - { + if (vlc_CPU_check ("SSE", SSE_test)) i_capabilities |= CPU_CAPABILITY_SSE; - } # endif } +# endif + +# if defined (__SSE2__) + i_capabilities |= CPU_CAPABILITY_SSE2; +# elif defined (CAN_COMPILE_SSE2) + if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test)) + i_capabilities |= CPU_CAPABILITY_SSE2; +# endif + +# if defined (__SSE3__) + i_capabilities |= CPU_CAPABILITY_SSE3; +# elif defined (CAN_COMPILE_SSE3) + if ((i_ecx & 0x00000001) && vlc_CPU_check ("SSE3", SSE3_test)) + i_capabilities |= CPU_CAPABILITY_SSE3; +# endif + +# if defined (__SSSE3__) + i_capabilities |= CPU_CAPABILITY_SSSE3; +# elif defined (CAN_COMPILE_SSSE3) + if ((i_ecx & 0x00000200) && vlc_CPU_check ("SSSE3", SSSE3_test)) + i_capabilities |= CPU_CAPABILITY_SSSE3; +# endif + +# if defined (__SSE4_1__) + i_capabilities |= CPU_CAPABILITY_SSE4_1; +# elif defined (CAN_COMPILE_SSE4_1) + if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test)) + i_capabilities |= CPU_CAPABILITY_SSE4_1; +# endif + +# if defined (__SSE4_2__) + i_capabilities |= CPU_CAPABILITY_SSE4_2; +# elif defined (CAN_COMPILE_SSE4_2) + if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test)) + i_capabilities |= CPU_CAPABILITY_SSE4_2; +# endif /* test for additional capabilities */ cpuid( 0x80000000 ); if( i_eax < 0x80000001 ) - { -# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) - signal( SIGILL, pf_sigill ); -# endif - return i_capabilities; - } + goto out; /* list these additional capabilities */ cpuid( 0x80000001 ); -# ifdef CAN_COMPILE_3DNOW - if( i_edx & 0x80000000 ) - { - psz_capability = "3D Now!"; - i_illegal = 0; - - if( setjmp( env ) == 0 ) - { - /* Test a 3D Now! instruction */ - __asm__ __volatile__ ( "pfadd %%mm0,%%mm0\n" "femms\n" : : ); - } - - if( i_illegal == 0 ) - { - i_capabilities |= CPU_CAPABILITY_3DNOW; - } - } -# endif +# if defined (__3dNOW__) + i_capabilities |= CPU_CAPABILITY_3DNOW; +# elif defined (CAN_COMPILE_3DNOW) + if ((i_edx & 0x80000000) && vlc_CPU_check ("3D Now!", ThreeD_Now_test)) + i_capabilities |= CPU_CAPABILITY_3DNOW; +# endif if( b_amd && ( i_edx & 0x00400000 ) ) { i_capabilities |= CPU_CAPABILITY_MMXEXT; } +out: -# if defined( CAN_COMPILE_SSE ) || defined ( CAN_COMPILE_3DNOW ) - signal( SIGILL, pf_sigill ); -# endif - return i_capabilities; +#elif defined (__arm__) -#elif defined( __powerpc__ ) +# if defined (__ARM_NEON__) + i_capabilities |= CPU_CAPABILITY_NEON; +# elif defined (CAN_COMPILE_NEON) +# define NEED_RUNTIME_CPU_CHECK 1 +# endif -# ifdef CAN_COMPILE_ALTIVEC - sighandler_t pf_sigill = signal( SIGILL, SigHandler ); +# ifdef NEED_RUNTIME_CPU_CHECK +# if defined (__linux__) + FILE *info = fopen ("/proc/cpuinfo", "rt"); + if (info != NULL) + { + char *line = NULL; + size_t linelen = 0; - i_capabilities |= CPU_CAPABILITY_FPU; + while (getline (&line, &linelen, info) != -1) + { + const char *cap; - i_illegal = 0; + if (strncmp (line, "Features\t:", 10)) + continue; - if( setjmp( env ) == 0 ) - { - asm volatile ("mtspr 256, %0\n\t" - "vand %%v0, %%v0, %%v0" - : - : "r" (-1)); + /* TODO: detect other CPU features when we use them */ +# if defined (CAN_COMPILE_NEON) && !defined (__ARM_NEON__) + cap = strstr (line + 10, " neon"); + if (cap != NULL && (cap[5] == '\0' || cap[5] == ' ')) + i_capabilities |= CPU_CAPABILITY_NEON; +# endif + break; + } + fclose (info); + free (line); } +# else +# warning Run-time CPU detection missing: optimizations disabled! +# endif +# endif + +#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \ + || defined( __ppc64__ ) + +# if defined(__APPLE__) || defined(__OpenBSD__) +# if defined(__OpenBSD__) + int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC }; +# else + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; +# endif + int i_has_altivec = 0; + size_t i_length = sizeof( i_has_altivec ); + int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0); - if( i_illegal == 0 ) - { + if( i_error == 0 && i_has_altivec != 0 ) i_capabilities |= CPU_CAPABILITY_ALTIVEC; - } - signal( SIGILL, pf_sigill ); -# endif - - return i_capabilities; +# elif defined( CAN_COMPILE_ALTIVEC ) + if (vlc_CPU_check ("Altivec", Altivec_test)) + i_capabilities |= CPU_CAPABILITY_ALTIVEC; -#elif defined( __sparc__ ) +# endif - i_capabilities |= CPU_CAPABILITY_FPU; - return i_capabilities; +#endif -#else - /* default behaviour */ - return i_capabilities; + cpu_flags = i_capabilities; +} +/** + * Retrieves pre-computed CPU capability flags + */ +unsigned vlc_CPU (void) +{ +/* On Windows and OS/2, + * initialized from DllMain() and _DLL_InitTerm() respectively, instead */ +#if !defined(WIN32) && !defined(__OS2__) + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once (&once, vlc_CPU_init); #endif + return cpu_flags; } -/***************************************************************************** - * SigHandler: system signal handler - ***************************************************************************** - * This function is called when an illegal instruction signal is received by - * the program. We use this function to test OS and CPU capabilities - *****************************************************************************/ -static void SigHandler( int i_signal ) +void vlc_CPU_dump (vlc_object_t *obj) { - /* Acknowledge the signal received */ - i_illegal = 1; + const unsigned flags = vlc_CPU(); + char buf[200], *p = buf; + +#define PRINT_CAPABILITY( capability, string ) \ + if (flags & (capability)) \ + p += sprintf (p, "%s ", (string) ) + +#if defined (__i386__) || defined (__x86_64__) + PRINT_CAPABILITY(CPU_CAPABILITY_MMX, "MMX"); + PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!"); + PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2"); + PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A"); + +#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__) + PRINT_CAPABILITY(CPU_CAPABILITY_ALTIVEC, "AltiVec"); + +#elif defined (__arm__) + PRINT_CAPABILITY(CPU_CAPABILITY_NEON, "NEONv1"); -#ifdef HAVE_SIGRELSE - sigrelse( i_signal ); #endif -#if defined( __i386__ ) - fprintf( stderr, "warning: your CPU has %s instructions, but not your " - "operating system.\n", psz_capability ); - fprintf( stderr, " some optimizations will be disabled unless " - "you upgrade your OS\n" ); -# if defined( SYS_LINUX ) - fprintf( stderr, " (for instance Linux kernel 2.4.x or later)\n" ); -# endif +#if HAVE_FPU + p += sprintf (p, "FPU "); #endif - longjmp( env, 1 ); + if (p > buf) + msg_Dbg (obj, "CPU has capabilities %s", buf); } + +static vlc_memcpy_t pf_vlc_memcpy = memcpy; + +void vlc_fastmem_register (vlc_memcpy_t cpy) +{ + assert (cpy != NULL); + pf_vlc_memcpy = cpy; +} + +/** + * vlc_memcpy: fast CPU-dependent memcpy + */ +void *vlc_memcpy (void *tgt, const void *src, size_t n) +{ + return pf_vlc_memcpy (tgt, src, n); +}