]> git.sesse.net Git - x264/blob - common/win32thread.c
x86inc: Fix AVX emulation of scalar float instructions
[x264] / common / win32thread.c
1 /*****************************************************************************
2  * win32thread.c: windows threading
3  *****************************************************************************
4  * Copyright (C) 2010-2016 x264 project
5  *
6  * Authors: Steven Walters <kemuri9@gmail.com>
7  *          Pegasys Inc. <http://www.pegasys-inc.com>
8  *          Henrik Gramner <henrik@gramner.com>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23  *
24  * This program is also available under a commercial proprietary license.
25  * For more information, contact us at licensing@x264.com.
26  *****************************************************************************/
27
28 /* Microsoft's way of supporting systems with >64 logical cpus can be found at
29  * http://www.microsoft.com/whdc/system/Sysinternals/MoreThan64proc.mspx */
30
31 /* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
32  * this API does not detect nor utilize more than 64 cpus for systems that have them. */
33
34 #include "common.h"
35
36 #if HAVE_WINRT
37 /* _beginthreadex() is technically the correct option, but it's only available for Desktop applications.
38  * Using CreateThread() as an alternative works on Windows Store and Windows Phone 8.1+ as long as we're
39  * using a dynamically linked MSVCRT which happens to be a requirement for WinRT applications anyway */
40 #define _beginthreadex CreateThread
41 #define InitializeCriticalSectionAndSpinCount(a, b) InitializeCriticalSectionEx(a, b, CRITICAL_SECTION_NO_DEBUG_INFO)
42 #define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
43 #else
44 #include <process.h>
45 #endif
46
47 /* number of times to spin a thread about to block on a locked mutex before retrying and sleeping if still locked */
48 #define X264_SPIN_COUNT 0
49
50 /* global mutex for replacing MUTEX_INITIALIZER instances */
51 static x264_pthread_mutex_t static_mutex;
52
53 /* _beginthreadex requires that the start routine is __stdcall */
54 static unsigned __stdcall x264_win32thread_worker( void *arg )
55 {
56     x264_pthread_t *h = arg;
57     *h->p_ret = h->func( h->arg );
58     return 0;
59 }
60
61 int x264_pthread_create( x264_pthread_t *thread, const x264_pthread_attr_t *attr,
62                          void *(*start_routine)( void* ), void *arg )
63 {
64     thread->func   = start_routine;
65     thread->arg    = arg;
66     thread->p_ret  = &thread->ret;
67     thread->ret    = NULL;
68     thread->handle = (void*)_beginthreadex( NULL, 0, x264_win32thread_worker, thread, 0, NULL );
69     return !thread->handle;
70 }
71
72 int x264_pthread_join( x264_pthread_t thread, void **value_ptr )
73 {
74     DWORD ret = WaitForSingleObject( thread.handle, INFINITE );
75     if( ret != WAIT_OBJECT_0 )
76         return -1;
77     if( value_ptr )
78         *value_ptr = *thread.p_ret;
79     CloseHandle( thread.handle );
80     return 0;
81 }
82
83 int x264_pthread_mutex_init( x264_pthread_mutex_t *mutex, const x264_pthread_mutexattr_t *attr )
84 {
85     return !InitializeCriticalSectionAndSpinCount( mutex, X264_SPIN_COUNT );
86 }
87
88 int x264_pthread_mutex_destroy( x264_pthread_mutex_t *mutex )
89 {
90     DeleteCriticalSection( mutex );
91     return 0;
92 }
93
94 int x264_pthread_mutex_lock( x264_pthread_mutex_t *mutex )
95 {
96     static const x264_pthread_mutex_t init = X264_PTHREAD_MUTEX_INITIALIZER;
97     if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
98         *mutex = static_mutex;
99     EnterCriticalSection( mutex );
100     return 0;
101 }
102
103 int x264_pthread_mutex_unlock( x264_pthread_mutex_t *mutex )
104 {
105     LeaveCriticalSection( mutex );
106     return 0;
107 }
108
109 void x264_win32_threading_destroy( void )
110 {
111     x264_pthread_mutex_destroy( &static_mutex );
112     memset( &static_mutex, 0, sizeof(static_mutex) );
113 }
114
115 #if HAVE_WINRT
116 int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
117 {
118     InitializeConditionVariable( cond );
119     return 0;
120 }
121
122 int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
123 {
124     return 0;
125 }
126
127 int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
128 {
129     WakeAllConditionVariable( cond );
130     return 0;
131 }
132
133 int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
134 {
135     WakeConditionVariable( cond );
136     return 0;
137 }
138
139 int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
140 {
141     return !SleepConditionVariableCS( cond, mutex, INFINITE );
142 }
143
144 int x264_win32_threading_init( void )
145 {
146     return x264_pthread_mutex_init( &static_mutex, NULL );
147 }
148
149 int x264_pthread_num_processors_np( void )
150 {
151     SYSTEM_INFO si;
152     GetNativeSystemInfo(&si);
153     return si.dwNumberOfProcessors;
154 }
155
156 #else
157
158 static struct
159 {
160     /* function pointers to conditional variable API on windows 6.0+ kernels */
161     void (WINAPI *cond_broadcast)( x264_pthread_cond_t *cond );
162     void (WINAPI *cond_init)( x264_pthread_cond_t *cond );
163     void (WINAPI *cond_signal)( x264_pthread_cond_t *cond );
164     BOOL (WINAPI *cond_wait)( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex, DWORD milliseconds );
165 } thread_control;
166
167 /* for pre-Windows 6.0 platforms we need to define and use our own condition variable and api */
168 typedef struct
169 {
170     x264_pthread_mutex_t mtx_broadcast;
171     x264_pthread_mutex_t mtx_waiter_count;
172     volatile int waiter_count;
173     HANDLE semaphore;
174     HANDLE waiters_done;
175     volatile int is_broadcast;
176 } x264_win32_cond_t;
177
178 int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
179 {
180     if( thread_control.cond_init )
181     {
182         thread_control.cond_init( cond );
183         return 0;
184     }
185
186     /* non native condition variables */
187     x264_win32_cond_t *win32_cond = calloc( 1, sizeof(x264_win32_cond_t) );
188     if( !win32_cond )
189         return -1;
190     cond->Ptr = win32_cond;
191     win32_cond->semaphore = CreateSemaphoreW( NULL, 0, 0x7fffffff, NULL );
192     if( !win32_cond->semaphore )
193         return -1;
194
195     if( x264_pthread_mutex_init( &win32_cond->mtx_waiter_count, NULL ) )
196         return -1;
197     if( x264_pthread_mutex_init( &win32_cond->mtx_broadcast, NULL ) )
198         return -1;
199
200     win32_cond->waiters_done = CreateEventW( NULL, FALSE, FALSE, NULL );
201     if( !win32_cond->waiters_done )
202         return -1;
203
204     return 0;
205 }
206
207 int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
208 {
209     /* native condition variables do not destroy */
210     if( thread_control.cond_init )
211         return 0;
212
213     /* non native condition variables */
214     x264_win32_cond_t *win32_cond = cond->Ptr;
215     CloseHandle( win32_cond->semaphore );
216     CloseHandle( win32_cond->waiters_done );
217     x264_pthread_mutex_destroy( &win32_cond->mtx_broadcast );
218     x264_pthread_mutex_destroy( &win32_cond->mtx_waiter_count );
219     free( win32_cond );
220
221     return 0;
222 }
223
224 int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
225 {
226     if( thread_control.cond_broadcast )
227     {
228         thread_control.cond_broadcast( cond );
229         return 0;
230     }
231
232     /* non native condition variables */
233     x264_win32_cond_t *win32_cond = cond->Ptr;
234     x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
235     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
236     int have_waiter = 0;
237
238     if( win32_cond->waiter_count )
239     {
240         win32_cond->is_broadcast = 1;
241         have_waiter = 1;
242     }
243
244     if( have_waiter )
245     {
246         ReleaseSemaphore( win32_cond->semaphore, win32_cond->waiter_count, NULL );
247         x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
248         WaitForSingleObject( win32_cond->waiters_done, INFINITE );
249         win32_cond->is_broadcast = 0;
250     }
251     else
252         x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
253     return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
254 }
255
256 int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
257 {
258     if( thread_control.cond_signal )
259     {
260         thread_control.cond_signal( cond );
261         return 0;
262     }
263
264     /* non-native condition variables */
265     x264_win32_cond_t *win32_cond = cond->Ptr;
266
267     x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
268     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
269     int have_waiter = win32_cond->waiter_count;
270     x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
271
272     if( have_waiter )
273     {
274         ReleaseSemaphore( win32_cond->semaphore, 1, NULL );
275         WaitForSingleObject( win32_cond->waiters_done, INFINITE );
276     }
277
278     return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
279 }
280
281 int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
282 {
283     if( thread_control.cond_wait )
284         return !thread_control.cond_wait( cond, mutex, INFINITE );
285
286     /* non native condition variables */
287     x264_win32_cond_t *win32_cond = cond->Ptr;
288
289     x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
290     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
291     win32_cond->waiter_count++;
292     x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
293     x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
294
295     // unlock the external mutex
296     x264_pthread_mutex_unlock( mutex );
297     WaitForSingleObject( win32_cond->semaphore, INFINITE );
298
299     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
300     win32_cond->waiter_count--;
301     int last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
302     x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
303
304     if( last_waiter )
305         SetEvent( win32_cond->waiters_done );
306
307     // lock the external mutex
308     return x264_pthread_mutex_lock( mutex );
309 }
310
311 int x264_win32_threading_init( void )
312 {
313     /* find function pointers to API functions, if they exist */
314     HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
315     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
316     if( thread_control.cond_init )
317     {
318         /* we're on a windows 6.0+ kernel, acquire the rest of the functions */
319         thread_control.cond_broadcast = (void*)GetProcAddress( kernel_dll, "WakeAllConditionVariable" );
320         thread_control.cond_signal = (void*)GetProcAddress( kernel_dll, "WakeConditionVariable" );
321         thread_control.cond_wait = (void*)GetProcAddress( kernel_dll, "SleepConditionVariableCS" );
322     }
323     return x264_pthread_mutex_init( &static_mutex, NULL );
324 }
325
326 int x264_pthread_num_processors_np( void )
327 {
328     DWORD_PTR system_cpus, process_cpus = 0;
329     int cpus = 0;
330
331     /* GetProcessAffinityMask returns affinities of 0 when the process has threads in multiple processor groups.
332      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
333 #if ARCH_X86_64
334     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
335     HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
336     BOOL (*get_thread_affinity)( HANDLE thread, void *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
337     if( get_thread_affinity )
338     {
339         /* running on a platform that supports >64 logical cpus */
340         struct /* GROUP_AFFINITY */
341         {
342             ULONG_PTR mask; // KAFFINITY = ULONG_PTR
343             USHORT group;
344             USHORT reserved[3];
345         } thread_affinity;
346         if( get_thread_affinity( GetCurrentThread(), &thread_affinity ) )
347             process_cpus = thread_affinity.mask;
348     }
349 #endif
350     if( !process_cpus )
351         GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus );
352     for( DWORD_PTR bit = 1; bit; bit <<= 1 )
353         cpus += !!(process_cpus & bit);
354
355     return cpus ? cpus : 1;
356 }
357 #endif