/*****************************************************************************
* win32thread.c: windows threading
*****************************************************************************
- * Copyright (C) 2010 x264 project
+ * Copyright (C) 2010-2013 x264 project
*
* Authors: Steven Walters <kemuri9@gmail.com>
* Pegasys Inc. <http://www.pegasys-inc.com>
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-/* TODO: work with windows 7 x86_64's (and later systems) awkward
- * way of handling systems with >64 logical processors */
+/* Microsoft's way of supporting systems with >64 logical cpus can be found at
+ * http://www.microsoft.com/whdc/system/Sysinternals/MoreThan64proc.mspx */
+
+/* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
+ * this API does not detect nor utilize more than 64 cpus for systems that have them. */
#include "common.h"
#include <process.h>
/* number of times to spin a thread about to block on a locked mutex before retrying and sleeping if still locked */
#define X264_SPIN_COUNT 0
+/* GROUP_AFFINITY struct */
+typedef struct
+{
+ ULONG_PTR mask; // KAFFINITY = ULONG_PTR
+ USHORT group;
+ USHORT reserved[3];
+} x264_group_affinity_t;
+
typedef struct
{
/* global mutex for replacing MUTEX_INITIALIZER instances */
static x264_win32thread_control_t thread_control;
/* _beginthreadex requires that the start routine is __stdcall */
-static __stdcall unsigned x264_win32thread_worker( void *arg )
+static unsigned __stdcall x264_win32thread_worker( void *arg )
{
x264_pthread_t *h = arg;
- h->ret = h->func( h->arg );
+ *h->p_ret = h->func( h->arg );
return 0;
}
{
thread->func = start_routine;
thread->arg = arg;
+ thread->p_ret = &thread->ret;
+ thread->ret = NULL;
thread->handle = (void*)_beginthreadex( NULL, 0, x264_win32thread_worker, thread, 0, NULL );
return !thread->handle;
}
if( ret != WAIT_OBJECT_0 )
return -1;
if( value_ptr )
- *value_ptr = thread.ret;
+ *value_ptr = *thread.p_ret;
CloseHandle( thread.handle );
return 0;
}
{
x264_pthread_mutex_t mtx_broadcast;
x264_pthread_mutex_t mtx_waiter_count;
- int waiter_count;
+ volatile int waiter_count;
HANDLE semaphore;
HANDLE waiters_done;
- int is_broadcast;
+ volatile int is_broadcast;
} x264_win32_cond_t;
int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
/* non-native condition variables */
x264_win32_cond_t *win32_cond = cond->ptr;
+
+ x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
int have_waiter = win32_cond->waiter_count;
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
if( have_waiter )
+ {
ReleaseSemaphore( win32_cond->semaphore, 1, NULL );
- return 0;
+ WaitForSingleObject( win32_cond->waiters_done, INFINITE );
+ }
+
+ return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
}
int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
x264_win32_cond_t *win32_cond = cond->ptr;
x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
- x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
-
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
win32_cond->waiter_count++;
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
+ x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
// unlock the external mutex
x264_pthread_mutex_unlock( mutex );
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
win32_cond->waiter_count--;
- int last_waiter = !win32_cond->waiter_count && win32_cond->is_broadcast;
+ int last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
if( last_waiter )
memset( &thread_control, 0, sizeof(x264_win32thread_control_t) );
}
-int x264_pthread_num_processors_np()
+int x264_pthread_num_processors_np( void )
{
- DWORD_PTR process_cpus, system_cpus;
- if( GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus ) )
+ DWORD_PTR system_cpus, process_cpus = 0;
+ int cpus = 0;
+
+ /* GetProcessAffinityMask returns affinities of 0 when the process has threads in multiple processor groups.
+ * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
+#if ARCH_X86_64
+ /* find function pointers to API functions specific to x86_64 platforms, if they exist */
+ HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+ BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
+ if( get_thread_affinity )
{
- int cpus = 0;
- for( DWORD_PTR bit = 1; bit; bit <<= 1 )
- cpus += !!(process_cpus & bit);
- return cpus;
+ /* running on a platform that supports >64 logical cpus */
+ x264_group_affinity_t thread_affinity;
+ if( get_thread_affinity( GetCurrentThread(), &thread_affinity ) )
+ process_cpus = thread_affinity.mask;
}
- return 1;
+#endif
+ if( !process_cpus )
+ GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus );
+ for( DWORD_PTR bit = 1; bit; bit <<= 1 )
+ cpus += !!(process_cpus & bit);
+
+ return cpus ? cpus : 1;
}