]> git.sesse.net Git - x264/blob - common/win32thread.c
Bump dates to 2016
[x264] / common / win32thread.c
1 /*****************************************************************************
2  * win32thread.c: windows threading
3  *****************************************************************************
4  * Copyright (C) 2010-2016 x264 project
5  *
6  * Authors: Steven Walters <kemuri9@gmail.com>
7  *          Pegasys Inc. <http://www.pegasys-inc.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22  *
23  * This program is also available under a commercial proprietary license.
24  * For more information, contact us at licensing@x264.com.
25  *****************************************************************************/
26
27 /* Microsoft's way of supporting systems with >64 logical cpus can be found at
28  * http://www.microsoft.com/whdc/system/Sysinternals/MoreThan64proc.mspx */
29
30 /* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
31  * this API does not detect nor utilize more than 64 cpus for systems that have them. */
32
33 #include "common.h"
34 #include <process.h>
35
36 /* number of times to spin a thread about to block on a locked mutex before retrying and sleeping if still locked */
37 #define X264_SPIN_COUNT 0
38
39 /* GROUP_AFFINITY struct */
40 typedef struct
41 {
42     ULONG_PTR mask; // KAFFINITY = ULONG_PTR
43     USHORT group;
44     USHORT reserved[3];
45 } x264_group_affinity_t;
46
47 typedef struct
48 {
49     /* global mutex for replacing MUTEX_INITIALIZER instances */
50     x264_pthread_mutex_t static_mutex;
51
52     /* function pointers to conditional variable API on windows 6.0+ kernels */
53     void (WINAPI *cond_broadcast)( x264_pthread_cond_t *cond );
54     void (WINAPI *cond_init)( x264_pthread_cond_t *cond );
55     void (WINAPI *cond_signal)( x264_pthread_cond_t *cond );
56     BOOL (WINAPI *cond_wait)( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex, DWORD milliseconds );
57 } x264_win32thread_control_t;
58
59 static x264_win32thread_control_t thread_control;
60
61 /* _beginthreadex requires that the start routine is __stdcall */
62 static unsigned __stdcall x264_win32thread_worker( void *arg )
63 {
64     x264_pthread_t *h = arg;
65     *h->p_ret = h->func( h->arg );
66     return 0;
67 }
68
69 int x264_pthread_create( x264_pthread_t *thread, const x264_pthread_attr_t *attr,
70                          void *(*start_routine)( void* ), void *arg )
71 {
72     thread->func   = start_routine;
73     thread->arg    = arg;
74     thread->p_ret  = &thread->ret;
75     thread->ret    = NULL;
76     thread->handle = (void*)_beginthreadex( NULL, 0, x264_win32thread_worker, thread, 0, NULL );
77     return !thread->handle;
78 }
79
80 int x264_pthread_join( x264_pthread_t thread, void **value_ptr )
81 {
82     DWORD ret = WaitForSingleObject( thread.handle, INFINITE );
83     if( ret != WAIT_OBJECT_0 )
84         return -1;
85     if( value_ptr )
86         *value_ptr = *thread.p_ret;
87     CloseHandle( thread.handle );
88     return 0;
89 }
90
91 int x264_pthread_mutex_init( x264_pthread_mutex_t *mutex, const x264_pthread_mutexattr_t *attr )
92 {
93     return !InitializeCriticalSectionAndSpinCount( mutex, X264_SPIN_COUNT );
94 }
95
96 int x264_pthread_mutex_destroy( x264_pthread_mutex_t *mutex )
97 {
98     DeleteCriticalSection( mutex );
99     return 0;
100 }
101
102 int x264_pthread_mutex_lock( x264_pthread_mutex_t *mutex )
103 {
104     static x264_pthread_mutex_t init = X264_PTHREAD_MUTEX_INITIALIZER;
105     if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
106         *mutex = thread_control.static_mutex;
107     EnterCriticalSection( mutex );
108     return 0;
109 }
110
111 int x264_pthread_mutex_unlock( x264_pthread_mutex_t *mutex )
112 {
113     LeaveCriticalSection( mutex );
114     return 0;
115 }
116
117 /* for pre-Windows 6.0 platforms we need to define and use our own condition variable and api */
118 typedef struct
119 {
120     x264_pthread_mutex_t mtx_broadcast;
121     x264_pthread_mutex_t mtx_waiter_count;
122     volatile int waiter_count;
123     HANDLE semaphore;
124     HANDLE waiters_done;
125     volatile int is_broadcast;
126 } x264_win32_cond_t;
127
128 int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
129 {
130     if( thread_control.cond_init )
131     {
132         thread_control.cond_init( cond );
133         return 0;
134     }
135
136     /* non native condition variables */
137     x264_win32_cond_t *win32_cond = calloc( 1, sizeof(x264_win32_cond_t) );
138     if( !win32_cond )
139         return -1;
140     cond->ptr = win32_cond;
141     win32_cond->semaphore = CreateSemaphoreW( NULL, 0, 0x7fffffff, NULL );
142     if( !win32_cond->semaphore )
143         return -1;
144
145     if( x264_pthread_mutex_init( &win32_cond->mtx_waiter_count, NULL ) )
146         return -1;
147     if( x264_pthread_mutex_init( &win32_cond->mtx_broadcast, NULL ) )
148         return -1;
149
150     win32_cond->waiters_done = CreateEventW( NULL, FALSE, FALSE, NULL );
151     if( !win32_cond->waiters_done )
152         return -1;
153
154     return 0;
155 }
156
157 int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
158 {
159     /* native condition variables do not destroy */
160     if( thread_control.cond_init )
161         return 0;
162
163     /* non native condition variables */
164     x264_win32_cond_t *win32_cond = cond->ptr;
165     CloseHandle( win32_cond->semaphore );
166     CloseHandle( win32_cond->waiters_done );
167     x264_pthread_mutex_destroy( &win32_cond->mtx_broadcast );
168     x264_pthread_mutex_destroy( &win32_cond->mtx_waiter_count );
169     free( win32_cond );
170
171     return 0;
172 }
173
174 int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
175 {
176     if( thread_control.cond_broadcast )
177     {
178         thread_control.cond_broadcast( cond );
179         return 0;
180     }
181
182     /* non native condition variables */
183     x264_win32_cond_t *win32_cond = cond->ptr;
184     x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
185     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
186     int have_waiter = 0;
187
188     if( win32_cond->waiter_count )
189     {
190         win32_cond->is_broadcast = 1;
191         have_waiter = 1;
192     }
193
194     if( have_waiter )
195     {
196         ReleaseSemaphore( win32_cond->semaphore, win32_cond->waiter_count, NULL );
197         x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
198         WaitForSingleObject( win32_cond->waiters_done, INFINITE );
199         win32_cond->is_broadcast = 0;
200     }
201     else
202         x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
203     return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
204 }
205
206 int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
207 {
208     if( thread_control.cond_signal )
209     {
210         thread_control.cond_signal( cond );
211         return 0;
212     }
213
214     /* non-native condition variables */
215     x264_win32_cond_t *win32_cond = cond->ptr;
216
217     x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
218     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
219     int have_waiter = win32_cond->waiter_count;
220     x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
221
222     if( have_waiter )
223     {
224         ReleaseSemaphore( win32_cond->semaphore, 1, NULL );
225         WaitForSingleObject( win32_cond->waiters_done, INFINITE );
226     }
227
228     return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
229 }
230
231 int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
232 {
233     if( thread_control.cond_wait )
234         return !thread_control.cond_wait( cond, mutex, INFINITE );
235
236     /* non native condition variables */
237     x264_win32_cond_t *win32_cond = cond->ptr;
238
239     x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
240     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
241     win32_cond->waiter_count++;
242     x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
243     x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
244
245     // unlock the external mutex
246     x264_pthread_mutex_unlock( mutex );
247     WaitForSingleObject( win32_cond->semaphore, INFINITE );
248
249     x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
250     win32_cond->waiter_count--;
251     int last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
252     x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
253
254     if( last_waiter )
255         SetEvent( win32_cond->waiters_done );
256
257     // lock the external mutex
258     return x264_pthread_mutex_lock( mutex );
259 }
260
261 int x264_win32_threading_init( void )
262 {
263     /* find function pointers to API functions, if they exist */
264     HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
265     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
266     if( thread_control.cond_init )
267     {
268         /* we're on a windows 6.0+ kernel, acquire the rest of the functions */
269         thread_control.cond_broadcast = (void*)GetProcAddress( kernel_dll, "WakeAllConditionVariable" );
270         thread_control.cond_signal = (void*)GetProcAddress( kernel_dll, "WakeConditionVariable" );
271         thread_control.cond_wait = (void*)GetProcAddress( kernel_dll, "SleepConditionVariableCS" );
272     }
273     return x264_pthread_mutex_init( &thread_control.static_mutex, NULL );
274 }
275
276 void x264_win32_threading_destroy( void )
277 {
278     x264_pthread_mutex_destroy( &thread_control.static_mutex );
279     memset( &thread_control, 0, sizeof(x264_win32thread_control_t) );
280 }
281
282 int x264_pthread_num_processors_np( void )
283 {
284     DWORD_PTR system_cpus, process_cpus = 0;
285     int cpus = 0;
286
287     /* GetProcessAffinityMask returns affinities of 0 when the process has threads in multiple processor groups.
288      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
289 #if ARCH_X86_64
290     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
291     HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
292     BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
293     if( get_thread_affinity )
294     {
295         /* running on a platform that supports >64 logical cpus */
296         x264_group_affinity_t thread_affinity;
297         if( get_thread_affinity( GetCurrentThread(), &thread_affinity ) )
298             process_cpus = thread_affinity.mask;
299     }
300 #endif
301     if( !process_cpus )
302         GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus );
303     for( DWORD_PTR bit = 1; bit; bit <<= 1 )
304         cpus += !!(process_cpus & bit);
305
306     return cpus ? cpus : 1;
307 }