1 /*****************************************************************************
2 * cpu.c: CPU detection code
3 *****************************************************************************
4 * Copyright (C) 1998-2004 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Christophe Massiot <massiot@via.ecp.fr>
9 * Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
33 #include <vlc_common.h>
36 #include <sys/types.h>
47 #include <sys/sysctl.h>
52 static uint32_t cpu_flags;
54 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
55 || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
56 # if !defined( WIN32 ) && !defined( __OS2__ )
57 static bool check_OS_capability( const char *psz_capability, pid_t pid )
62 return false; /* fail safe :-/ */
64 while( waitpid( pid, &status, 0 ) == -1 );
66 if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
69 fprintf( stderr, "warning: your CPU has %s instructions, but not your "
70 "operating system.\n", psz_capability );
71 fprintf( stderr, " some optimizations will be disabled unless "
72 "you upgrade your OS\n" );
76 # define check_capability(name, flag, code, input) \
81 signal(SIGILL, SIG_DFL); \
82 __asm__ __volatile__ ( code : : input ); \
85 if( check_OS_capability((name), pid )) \
86 i_capabilities |= (flag); \
89 # else /* WIN32 || __OS2__ */
90 # define check_capability(name, flag, code, input) \
91 i_capabilities |= (flag);
96 * Determines the CPU capabilities and stores them in cpu_flags.
97 * The result can be retrieved with vlc_CPU().
99 void vlc_CPU_init (void)
101 uint32_t i_capabilities = 0;
103 #if defined( __i386__ ) || defined( __x86_64__ )
104 unsigned int i_eax, i_ebx, i_ecx, i_edx;
107 /* Needed for x86 CPU capabilities detection */
108 # if defined (__i386__) && defined (__PIC__)
109 # define cpuid(reg) \
110 asm volatile ("xchgl %%ebx,%1\n\t" \
112 "xchgl %%ebx,%1\n\t" \
113 : "=a" (i_eax), "=r" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
117 # define cpuid(reg) \
118 asm volatile ("cpuid\n\t" \
119 : "=a" (i_eax), "=b" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
123 /* Check if the OS really supports the requested instructions */
124 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
125 && !defined (__i686__) && !defined (__pentium4__) \
126 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
127 /* check if cpuid instruction is supported */
128 asm volatile ( "push %%ebx\n\t"
131 "movl %%eax, %%ebx\n\t"
132 "xorl $0x200000, %%eax\n\t"
148 /* the CPU supports the CPUID instruction - get its level */
151 # if defined (__i386__) && !defined (__i586__) \
152 && !defined (__i686__) && !defined (__pentium4__) \
153 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
158 /* borrowed from mpeg2dec */
159 b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
160 && ( i_edx == 0x69746e65 );
162 /* test for the MMX flag */
164 # if !defined (__MMX__)
165 if( ! (i_edx & 0x00800000) )
168 i_capabilities |= CPU_CAPABILITY_MMX;
170 # if defined (__SSE__)
171 i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
173 if( i_edx & 0x02000000 )
175 i_capabilities |= CPU_CAPABILITY_MMXEXT;
177 # ifdef CAN_COMPILE_SSE
178 check_capability( "SSE", CPU_CAPABILITY_SSE,
179 "xorps %%xmm0,%%xmm0\n", );
184 # if defined (__SSE2__)
185 i_capabilities |= CPU_CAPABILITY_SSE2;
186 # elif defined (CAN_COMPILE_SSE2)
187 if( i_edx & 0x04000000 )
188 check_capability( "SSE2", CPU_CAPABILITY_SSE2,
189 "movupd %%xmm0, %%xmm0\n", );
192 # if defined (__SSE3__)
193 i_capabilities |= CPU_CAPABILITY_SSE3;
194 # elif defined (CAN_COMPILE_SSE3)
195 if( i_ecx & 0x00000001 )
196 check_capability( "SSE3", CPU_CAPABILITY_SSE3,
197 "movsldup %%xmm1, %%xmm0\n", );
200 # if defined (__SSSE3__)
201 i_capabilities |= CPU_CAPABILITY_SSSE3;
202 # elif defined (CAN_COMPILE_SSSE3)
203 if( i_ecx & 0x00000200 )
204 check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
205 "pabsw %%xmm1, %%xmm0\n", );
208 # if defined (__SSE4_1__)
209 i_capabilities |= CPU_CAPABILITY_SSE4_1;
210 # elif defined (CAN_COMPILE_SSE4_1)
211 if( i_ecx & 0x00080000 )
212 check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
213 "pmaxsb %%xmm1, %%xmm0\n", );
216 # if defined (__SSE4_2__)
217 i_capabilities |= CPU_CAPABILITY_SSE4_2;
218 # elif defined (CAN_COMPILE_SSE4_2)
219 if( i_ecx & 0x00100000 )
220 check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
221 "pcmpgtq %%xmm1, %%xmm0\n", );
224 /* test for additional capabilities */
227 if( i_eax < 0x80000001 )
230 /* list these additional capabilities */
233 # if defined (__3dNOW__)
234 i_capabilities |= CPU_CAPABILITY_3DNOW;
235 # elif defined (CAN_COMPILE_3DNOW)
236 if( i_edx & 0x80000000 )
237 check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
238 "pfadd %%mm0,%%mm0\n" "femms\n", );
241 if( b_amd && ( i_edx & 0x00400000 ) )
243 i_capabilities |= CPU_CAPABILITY_MMXEXT;
247 #elif defined (__arm__)
249 # if defined (__ARM_NEON__)
250 i_capabilities |= CPU_CAPABILITY_NEON;
251 # elif defined (CAN_COMPILE_NEON)
252 # define NEED_RUNTIME_CPU_CHECK 1
255 # ifdef NEED_RUNTIME_CPU_CHECK
256 # if defined (__linux__)
257 FILE *info = fopen ("/proc/cpuinfo", "rt");
263 while (getline (&line, &linelen, info) != -1)
267 if (strncmp (line, "Features\t:", 10))
270 /* TODO: detect other CPU features when we use them */
271 # if defined (CAN_COMPILE_NEON) && !defined (__ARM_NEON__)
272 cap = strstr (line + 10, " neon");
273 if (cap != NULL && (cap[5] == '\0' || cap[5] == ' '))
274 i_capabilities |= CPU_CAPABILITY_NEON;
282 # warning Run-time CPU detection missing: optimizations disabled!
286 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
287 || defined( __ppc64__ )
289 # if defined(__APPLE__) || defined(__OpenBSD__)
290 # if defined(__OpenBSD__)
291 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
293 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
295 int i_has_altivec = 0;
296 size_t i_length = sizeof( i_has_altivec );
297 int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
299 if( i_error == 0 && i_has_altivec != 0 )
300 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
302 # elif defined( CAN_COMPILE_ALTIVEC )
303 check_capability( "Altivec", CPU_CAPABILITY_ALTIVEC,
305 "vand %%v0, %%v0, %%v0",
312 cpu_flags = i_capabilities;
316 * Retrieves pre-computed CPU capability flags
318 unsigned vlc_CPU (void)
320 /* On Windows and OS/2,
321 * initialized from DllMain() and _DLL_InitTerm() respectively, instead */
322 #if !defined(WIN32) && !defined(__OS2__)
323 static pthread_once_t once = PTHREAD_ONCE_INIT;
324 pthread_once (&once, vlc_CPU_init);
329 void vlc_CPU_dump (vlc_object_t *obj)
331 const unsigned flags = vlc_CPU();
332 char buf[200], *p = buf;
334 #define PRINT_CAPABILITY( capability, string ) \
335 if (flags & (capability)) \
336 p += sprintf (p, "%s ", (string) )
338 #if defined (__i386__) || defined (__x86_64__)
339 PRINT_CAPABILITY(CPU_CAPABILITY_MMX, "MMX");
340 PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
341 PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT");
342 PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE");
343 PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
344 PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
345 PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
346 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
347 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
348 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A");
350 #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
351 PRINT_CAPABILITY(CPU_CAPABILITY_ALTIVEC, "AltiVec");
353 #elif defined (__arm__)
354 PRINT_CAPABILITY(CPU_CAPABILITY_NEON, "NEONv1");
359 p += sprintf (p, "FPU ");
363 msg_Dbg (obj, "CPU has capabilities %s", buf);
367 static vlc_memcpy_t pf_vlc_memcpy = memcpy;
369 void vlc_fastmem_register (vlc_memcpy_t cpy)
371 assert (cpy != NULL);
376 * vlc_memcpy: fast CPU-dependent memcpy
378 void *vlc_memcpy (void *tgt, const void *src, size_t n)
380 return pf_vlc_memcpy (tgt, src, n);