Much faster then pthread_getspecific() but still a
speed regression against the original code.
Following are the nps on a bench:
Position
454165
454838
455433
tls
441046
442767
442767
ms (Win)
450521
447510
451105
ms (pthread)
422115
422115
424276
Signed-off-by: Marco Costalba <mcostalba@gmail.com>
margins[WHITE] = margins[BLACK] = VALUE_ZERO;
// Probe the material hash table
margins[WHITE] = margins[BLACK] = VALUE_ZERO;
// Probe the material hash table
- ei.mi = Threads.this_thread()->materialTable.probe(pos);
+ ei.mi = this_thread->materialTable.probe(pos);
score += ei.mi->material_value();
// If we have a specialized evaluation function for the current material
score += ei.mi->material_value();
// If we have a specialized evaluation function for the current material
}
// Probe the pawn hash table
}
// Probe the pawn hash table
- ei.pi = Threads.this_thread()->pawnTable.probe(pos);
+ ei.pi = this_thread->pawnTable.probe(pos);
score += ei.pi->pawns_value();
// Initialize attack and king safety bitboards
score += ei.pi->pawns_value();
// Initialize attack and king safety bitboards
typedef pthread_mutex_t Lock;
typedef pthread_cond_t WaitCondition;
typedef pthread_t NativeHandle;
typedef pthread_mutex_t Lock;
typedef pthread_cond_t WaitCondition;
typedef pthread_t NativeHandle;
-typedef pthread_key_t ThreadLocalStorageKey;
typedef void*(*pt_start_fn)(void*);
# define lock_init(x) pthread_mutex_init(&(x), NULL)
typedef void*(*pt_start_fn)(void*);
# define lock_init(x) pthread_mutex_init(&(x), NULL)
# define cond_timedwait(x,y,z) pthread_cond_timedwait(&(x),&(y),z)
# define thread_create(x,f,t) !pthread_create(&(x),NULL,(pt_start_fn)f,t)
# define thread_join(x) pthread_join(x, NULL)
# define cond_timedwait(x,y,z) pthread_cond_timedwait(&(x),&(y),z)
# define thread_create(x,f,t) !pthread_create(&(x),NULL,(pt_start_fn)f,t)
# define thread_join(x) pthread_join(x, NULL)
-# define tls_init(k) pthread_key_create(&k,NULL)
-# define tls_get(k) pthread_getspecific(k)
-# define tls_set(k,x) pthread_setspecific(k,x)
-# define tls_destroy(k) pthread_key_delete(k)
#else // Windows and MinGW
#else // Windows and MinGW
typedef CRITICAL_SECTION Lock;
typedef HANDLE WaitCondition;
typedef HANDLE NativeHandle;
typedef CRITICAL_SECTION Lock;
typedef HANDLE WaitCondition;
typedef HANDLE NativeHandle;
-typedef DWORD ThreadLocalStorageKey;
# define lock_init(x) InitializeCriticalSection(&(x))
# define lock_grab(x) EnterCriticalSection(&(x))
# define lock_init(x) InitializeCriticalSection(&(x))
# define lock_grab(x) EnterCriticalSection(&(x))
# define cond_timedwait(x,y,z) { lock_release(y); WaitForSingleObject(x,z); lock_grab(y); }
# define thread_create(x,f,t) (x = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)f,t,0,NULL), x != NULL)
# define thread_join(x) { WaitForSingleObject(x, INFINITE); CloseHandle(x); }
# define cond_timedwait(x,y,z) { lock_release(y); WaitForSingleObject(x,z); lock_grab(y); }
# define thread_create(x,f,t) (x = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)f,t,0,NULL), x != NULL)
# define thread_join(x) { WaitForSingleObject(x, INFINITE); CloseHandle(x); }
-# define tls_init(k) do { k = TlsAlloc(); } while(0)
-# define tls_get(k) TlsGetValue(k)
-# define tls_set(k,x) TlsSetValue(k,x)
-# define tls_destroy(k) TlsFree(k)
}
// Prefetch pawn and material hash tables
}
// Prefetch pawn and material hash tables
- prefetch((char*)Threads.this_thread()->pawnTable.entries[st->pawnKey]);
- prefetch((char*)Threads.this_thread()->materialTable.entries[st->materialKey]);
+ prefetch((char*)this_thread->pawnTable.entries[st->pawnKey]);
+ prefetch((char*)this_thread->materialTable.entries[st->materialKey]);
// Update incremental scores
st->psqScore += psq_delta(piece, from, to);
// Update incremental scores
st->psqScore += psq_delta(piece, from, to);
// but if we are pondering or in infinite search, we shouldn't print the best
// move before we are told to do so.
if (!Signals.stop && (Limits.ponder || Limits.infinite))
// but if we are pondering or in infinite search, we shouldn't print the best
// move before we are told to do so.
if (!Signals.stop && (Limits.ponder || Limits.infinite))
- Threads.this_thread()->wait_for_stop_or_ponderhit();
+ this_thread->wait_for_stop_or_ponderhit();
// Best move could be MOVE_NONE when searching on a stalemate position
cout << "bestmove " << move_to_uci(RootMoves[0].pv[0], Chess960)
// Best move could be MOVE_NONE when searching on a stalemate position
cout << "bestmove " << move_to_uci(RootMoves[0].pv[0], Chess960)
bool isPvMove, inCheck, singularExtensionNode, givesCheck;
bool captureOrPromotion, dangerous, doFullDepthSearch;
int moveCount = 0, playedMoveCount = 0;
bool isPvMove, inCheck, singularExtensionNode, givesCheck;
bool captureOrPromotion, dangerous, doFullDepthSearch;
int moveCount = 0, playedMoveCount = 0;
- Thread* thisThread = Threads.this_thread();
+ Thread* thisThread = this_thread;
SplitPoint* sp = NULL;
refinedValue = bestValue = value = -VALUE_INFINITE;
SplitPoint* sp = NULL;
refinedValue = bestValue = value = -VALUE_INFINITE;
using namespace Search;
ThreadsManager Threads; // Global object
using namespace Search;
ThreadsManager Threads; // Global object
+THREAD_LOCAL Thread* this_thread; // Thread local variable
long start_routine(Thread* th) {
long start_routine(Thread* th) {
- Threads.set_this_thread(th); // Save pointer into thread local storage
+ this_thread = th; // Save pointer into thread local storage
(th->*(th->start_fn))();
return 0;
}
(th->*(th->start_fn))();
return 0;
}
void ThreadsManager::init() {
void ThreadsManager::init() {
cond_init(sleepCond);
lock_init(splitLock);
timer = new Thread(&Thread::timer_loop);
threads.push_back(new Thread(&Thread::main_loop));
cond_init(sleepCond);
lock_init(splitLock);
timer = new Thread(&Thread::timer_loop);
threads.push_back(new Thread(&Thread::main_loop));
- set_this_thread(main_thread()); // Use main thread's resources
+ this_thread = main_thread(); // Use main thread's resources
delete timer;
lock_destroy(splitLock);
cond_destroy(sleepCond);
delete timer;
lock_destroy(splitLock);
cond_destroy(sleepCond);
assert(beta <= VALUE_INFINITE);
assert(depth > DEPTH_ZERO);
assert(beta <= VALUE_INFINITE);
assert(depth > DEPTH_ZERO);
- Thread* master = this_thread();
+ Thread* master = this_thread;
if (master->splitPointsCnt >= MAX_SPLITPOINTS_PER_THREAD)
return bestValue;
if (master->splitPointsCnt >= MAX_SPLITPOINTS_PER_THREAD)
return bestValue;
int min_split_depth() const { return minimumSplitDepth; }
int size() const { return (int)threads.size(); }
Thread* main_thread() const { return threads[0]; }
int min_split_depth() const { return minimumSplitDepth; }
int size() const { return (int)threads.size(); }
Thread* main_thread() const { return threads[0]; }
- Thread* this_thread() const { return (Thread*)tls_get(tlsKey); }
- void set_this_thread(Thread* th) const { tls_set(tlsKey, th); }
void wake_up() const;
void sleep() const;
void wake_up() const;
void sleep() const;
std::vector<Thread*> threads;
Thread* timer;
std::vector<Thread*> threads;
Thread* timer;
- ThreadLocalStorageKey tlsKey;
Lock splitLock;
WaitCondition sleepCond;
Depth minimumSplitDepth;
Lock splitLock;
WaitCondition sleepCond;
Depth minimumSplitDepth;
};
extern ThreadsManager Threads;
};
extern ThreadsManager Threads;
+extern THREAD_LOCAL Thread* this_thread;
#endif // !defined(THREAD_H_INCLUDED)
#endif // !defined(THREAD_H_INCLUDED)
# define FORCE_INLINE inline
#endif
# define FORCE_INLINE inline
#endif
+#if defined(__GNUC__)
+# define THREAD_LOCAL __thread
+#else
+# define THREAD_LOCAL __declspec(thread)
+#endif
+
#if defined(USE_POPCNT)
const bool HasPopCnt = true;
#else
#if defined(USE_POPCNT)
const bool HasPopCnt = true;
#else