1 /*****************************************************************************
2 * cpu.c: CPU detection code
3 *****************************************************************************
4 * Copyright (C) 1998-2004 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Christophe Massiot <massiot@via.ecp.fr>
9 * Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
33 #include <vlc_common.h>
36 #include <sys/types.h>
47 #include <sys/sysctl.h>
52 static uint32_t cpu_flags;
54 #if defined (__i386__) || defined (__x86_64__) || defined (__powerpc__) \
55 || defined (__ppc__) || defined (__ppc64__) || defined (__powerpc64__)
56 # if !defined (WIN32) && !defined (__OS2__)
57 static bool vlc_CPU_check (const char *name, void (*func) (void))
64 signal (SIGILL, SIG_DFL);
66 //__asm__ __volatile__ ( code : : input );
71 //i_capabilities |= (flag);
74 while( waitpid( pid, &status, 0 ) == -1 );
76 if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
79 fprintf (stderr, "Warning: your CPU has %s instructions, but not your "
80 "operating system.\n", name);
81 fprintf( stderr, " some optimizations will be disabled unless "
82 "you upgrade your OS\n" );
86 #if defined (CAN_COMPILE_SSE) && !defined (__SSE__)
87 VLC_SSE static void SSE_test (void)
89 asm volatile ("xorps %%xmm0,%%xmm0\n" : : : "xmm0", "xmm1");
92 #if defined (CAN_COMPILE_SSE2) && !defined (__SSE2__)
93 VLC_SSE static void SSE2_test (void)
95 asm volatile ("movupd %%xmm0, %%xmm0\n" : : : "xmm0", "xmm1");
98 #if defined (CAN_COMPILE_SSE3) && !defined (__SSE3__)
99 VLC_SSE static void SSE3_test (void)
101 asm volatile ("movsldup %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
104 #if defined (CAN_COMPILE_SSSE3) && !defined (__SSSE3__)
105 VLC_SSE static void SSSE3_test (void)
107 asm volatile ("pabsw %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
110 #if defined (CAN_COMPILE_SSE4_1) && !defined (__SSE4_1__)
111 VLC_SSE static void SSE4_1_test (void)
113 asm volatile ("pmaxsb %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
116 #if defined (CAN_COMPILE_SSE4_2) && !defined (__SSE4_2__)
117 VLC_SSE static void SSE4_2_test (void)
119 asm volatile ("pcmpgtq %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
122 #if defined (CAN_COMPILE_3DNOW) && !defined (__3dNOW__)
123 VLC_MMX static void ThreeD_Now_test (void)
125 asm volatile ("pfadd %%mm0,%%mm0\n" "femms\n" : : : "mm0");
129 #if defined (CAN_COMPILE_ALTIVEC)
130 static void Altivec_text (void)
132 asm volatile ("mtspr 256, %0\n" "vand %%v0, %%v0, %%v0\n" : : "r" (-1));
136 #else /* WIN32 || __OS2__ */
137 # define vlc_CPU_check(name, func) (1)
142 * Determines the CPU capabilities and stores them in cpu_flags.
143 * The result can be retrieved with vlc_CPU().
145 void vlc_CPU_init (void)
147 uint32_t i_capabilities = 0;
149 #if defined( __i386__ ) || defined( __x86_64__ )
150 unsigned int i_eax, i_ebx, i_ecx, i_edx;
153 /* Needed for x86 CPU capabilities detection */
154 # if defined (__i386__) && defined (__PIC__)
155 # define cpuid(reg) \
156 asm volatile ("xchgl %%ebx,%1\n\t" \
158 "xchgl %%ebx,%1\n\t" \
159 : "=a" (i_eax), "=r" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
163 # define cpuid(reg) \
164 asm volatile ("cpuid\n\t" \
165 : "=a" (i_eax), "=b" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
169 /* Check if the OS really supports the requested instructions */
170 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
171 && !defined (__i686__) && !defined (__pentium4__) \
172 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
173 /* check if cpuid instruction is supported */
174 asm volatile ( "push %%ebx\n\t"
177 "movl %%eax, %%ebx\n\t"
178 "xorl $0x200000, %%eax\n\t"
194 /* the CPU supports the CPUID instruction - get its level */
197 # if defined (__i386__) && !defined (__i586__) \
198 && !defined (__i686__) && !defined (__pentium4__) \
199 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
204 /* borrowed from mpeg2dec */
205 b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
206 && ( i_edx == 0x69746e65 );
208 /* test for the MMX flag */
210 # if !defined (__MMX__)
211 if( ! (i_edx & 0x00800000) )
214 i_capabilities |= CPU_CAPABILITY_MMX;
216 # if defined (__SSE__)
217 i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
219 if( i_edx & 0x02000000 )
221 i_capabilities |= CPU_CAPABILITY_MMXEXT;
223 # ifdef CAN_COMPILE_SSE
224 if (vlc_CPU_check ("SSE", SSE_test))
225 i_capabilities |= CPU_CAPABILITY_SSE;
230 # if defined (__SSE2__)
231 i_capabilities |= CPU_CAPABILITY_SSE2;
232 # elif defined (CAN_COMPILE_SSE2)
233 if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test))
234 i_capabilities |= CPU_CAPABILITY_SSE2;
237 # if defined (__SSE3__)
238 i_capabilities |= CPU_CAPABILITY_SSE3;
239 # elif defined (CAN_COMPILE_SSE3)
240 if ((i_ecx & 0x00000001) && vlc_CPU_check ("SSE3", SSE3_test))
241 i_capabilities |= CPU_CAPABILITY_SSE3;
244 # if defined (__SSSE3__)
245 i_capabilities |= CPU_CAPABILITY_SSSE3;
246 # elif defined (CAN_COMPILE_SSSE3)
247 if ((i_ecx & 0x00000200) && vlc_CPU_check ("SSSE3", SSSE3_test))
248 i_capabilities |= CPU_CAPABILITY_SSSE3;
251 # if defined (__SSE4_1__)
252 i_capabilities |= CPU_CAPABILITY_SSE4_1;
253 # elif defined (CAN_COMPILE_SSE4_1)
254 if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test))
255 i_capabilities |= CPU_CAPABILITY_SSE4_1;
258 # if defined (__SSE4_2__)
259 i_capabilities |= CPU_CAPABILITY_SSE4_2;
260 # elif defined (CAN_COMPILE_SSE4_2)
261 if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test))
262 i_capabilities |= CPU_CAPABILITY_SSE4_2;
265 /* test for additional capabilities */
268 if( i_eax < 0x80000001 )
271 /* list these additional capabilities */
274 # if defined (__3dNOW__)
275 i_capabilities |= CPU_CAPABILITY_3DNOW;
276 # elif defined (CAN_COMPILE_3DNOW)
277 if ((i_edx & 0x80000000) && vlc_CPU_check ("3D Now!", ThreeD_Now_test))
278 i_capabilities |= CPU_CAPABILITY_3DNOW;
281 if( b_amd && ( i_edx & 0x00400000 ) )
283 i_capabilities |= CPU_CAPABILITY_MMXEXT;
287 #elif defined (__arm__)
289 # if defined (__ARM_NEON__)
290 i_capabilities |= CPU_CAPABILITY_NEON;
291 # elif defined (CAN_COMPILE_NEON)
292 # define NEED_RUNTIME_CPU_CHECK 1
295 # ifdef NEED_RUNTIME_CPU_CHECK
296 # if defined (__linux__)
297 FILE *info = fopen ("/proc/cpuinfo", "rt");
303 while (getline (&line, &linelen, info) != -1)
307 if (strncmp (line, "Features\t:", 10))
310 /* TODO: detect other CPU features when we use them */
311 # if defined (CAN_COMPILE_NEON) && !defined (__ARM_NEON__)
312 cap = strstr (line + 10, " neon");
313 if (cap != NULL && (cap[5] == '\0' || cap[5] == ' '))
314 i_capabilities |= CPU_CAPABILITY_NEON;
322 # warning Run-time CPU detection missing: optimizations disabled!
326 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
327 || defined( __ppc64__ )
329 # if defined(__APPLE__) || defined(__OpenBSD__)
330 # if defined(__OpenBSD__)
331 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
333 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
335 int i_has_altivec = 0;
336 size_t i_length = sizeof( i_has_altivec );
337 int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
339 if( i_error == 0 && i_has_altivec != 0 )
340 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
342 # elif defined( CAN_COMPILE_ALTIVEC )
343 if (vlc_CPU_check ("Altivec", Altivec_test))
344 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
350 cpu_flags = i_capabilities;
354 * Retrieves pre-computed CPU capability flags
356 unsigned vlc_CPU (void)
358 /* On Windows and OS/2,
359 * initialized from DllMain() and _DLL_InitTerm() respectively, instead */
360 #if !defined(WIN32) && !defined(__OS2__)
361 static pthread_once_t once = PTHREAD_ONCE_INIT;
362 pthread_once (&once, vlc_CPU_init);
367 void vlc_CPU_dump (vlc_object_t *obj)
369 const unsigned flags = vlc_CPU();
370 char buf[200], *p = buf;
372 #define PRINT_CAPABILITY( capability, string ) \
373 if (flags & (capability)) \
374 p += sprintf (p, "%s ", (string) )
376 #if defined (__i386__) || defined (__x86_64__)
377 PRINT_CAPABILITY(CPU_CAPABILITY_MMX, "MMX");
378 PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
379 PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT, "MMXEXT");
380 PRINT_CAPABILITY(CPU_CAPABILITY_SSE, "SSE");
381 PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
382 PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
383 PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
384 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
385 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
386 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A");
388 #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
389 PRINT_CAPABILITY(CPU_CAPABILITY_ALTIVEC, "AltiVec");
391 #elif defined (__arm__)
392 PRINT_CAPABILITY(CPU_CAPABILITY_NEON, "NEONv1");
397 p += sprintf (p, "FPU ");
401 msg_Dbg (obj, "CPU has capabilities %s", buf);
405 static vlc_memcpy_t pf_vlc_memcpy = memcpy;
407 void vlc_fastmem_register (vlc_memcpy_t cpy)
409 assert (cpy != NULL);
414 * vlc_memcpy: fast CPU-dependent memcpy
416 void *vlc_memcpy (void *tgt, const void *src, size_t n)
418 return pf_vlc_memcpy (tgt, src, n);