From: Marco Costalba Date: Fri, 6 Apr 2012 16:01:41 +0000 (+0100) Subject: Use thread_local compiler specifics X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=b1f57e92cea7bd36126ef8c26928d8991b74baef Use thread_local compiler specifics Much faster then pthread_getspecific() but still a speed regression against the original code. Following are the nps on a bench: Position 454165 454838 455433 tls 441046 442767 442767 ms (Win) 450521 447510 451105 ms (pthread) 422115 422115 424276 Signed-off-by: Marco Costalba --- diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 520c33e0..515405ca 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -371,7 +371,7 @@ Value do_evaluate(const Position& pos, Value& margin) { margins[WHITE] = margins[BLACK] = VALUE_ZERO; // Probe the material hash table - ei.mi = Threads.this_thread()->materialTable.probe(pos); + ei.mi = this_thread->materialTable.probe(pos); score += ei.mi->material_value(); // If we have a specialized evaluation function for the current material @@ -383,7 +383,7 @@ Value do_evaluate(const Position& pos, Value& margin) { } // Probe the pawn hash table - ei.pi = Threads.this_thread()->pawnTable.probe(pos); + ei.pi = this_thread->pawnTable.probe(pos); score += ei.pi->pawns_value(); // Initialize attack and king safety bitboards diff --git a/src/platform.h b/src/platform.h index a925d5f7..e002d218 100644 --- a/src/platform.h +++ b/src/platform.h @@ -54,7 +54,6 @@ inline uint64_t time_to_msec(const sys_time_t& t) { return t.tv_sec * 1000LL + t typedef pthread_mutex_t Lock; typedef pthread_cond_t WaitCondition; typedef pthread_t NativeHandle; -typedef pthread_key_t ThreadLocalStorageKey; typedef void*(*pt_start_fn)(void*); # define lock_init(x) pthread_mutex_init(&(x), NULL) @@ -68,10 +67,6 @@ typedef void*(*pt_start_fn)(void*); # define cond_timedwait(x,y,z) pthread_cond_timedwait(&(x),&(y),z) # define thread_create(x,f,t) !pthread_create(&(x),NULL,(pt_start_fn)f,t) # define thread_join(x) pthread_join(x, NULL) -# define tls_init(k) pthread_key_create(&k,NULL) -# define tls_get(k) pthread_getspecific(k) -# define tls_set(k,x) pthread_setspecific(k,x) -# define tls_destroy(k) pthread_key_delete(k) #else // Windows and MinGW @@ -96,7 +91,6 @@ inline uint64_t time_to_msec(const sys_time_t& t) { return t.time * 1000LL + t.m typedef CRITICAL_SECTION Lock; typedef HANDLE WaitCondition; typedef HANDLE NativeHandle; -typedef DWORD ThreadLocalStorageKey; # define lock_init(x) InitializeCriticalSection(&(x)) # define lock_grab(x) EnterCriticalSection(&(x)) @@ -109,10 +103,6 @@ typedef DWORD ThreadLocalStorageKey; # define cond_timedwait(x,y,z) { lock_release(y); WaitForSingleObject(x,z); lock_grab(y); } # define thread_create(x,f,t) (x = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)f,t,0,NULL), x != NULL) # define thread_join(x) { WaitForSingleObject(x, INFINITE); CloseHandle(x); } -# define tls_init(k) do { k = TlsAlloc(); } while(0) -# define tls_get(k) TlsGetValue(k) -# define tls_set(k,x) TlsSetValue(k,x) -# define tls_destroy(k) TlsFree(k) #endif diff --git a/src/position.cpp b/src/position.cpp index 62749a91..47b08ec0 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -895,8 +895,8 @@ void Position::do_move(Move m, StateInfo& newSt, const CheckInfo& ci, bool moveI } // Prefetch pawn and material hash tables - prefetch((char*)Threads.this_thread()->pawnTable.entries[st->pawnKey]); - prefetch((char*)Threads.this_thread()->materialTable.entries[st->materialKey]); + prefetch((char*)this_thread->pawnTable.entries[st->pawnKey]); + prefetch((char*)this_thread->materialTable.entries[st->materialKey]); // Update incremental scores st->psqScore += psq_delta(piece, from, to); diff --git a/src/search.cpp b/src/search.cpp index eaba6c6d..ebbff3a4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -332,7 +332,7 @@ finalize: // but if we are pondering or in infinite search, we shouldn't print the best // move before we are told to do so. if (!Signals.stop && (Limits.ponder || Limits.infinite)) - Threads.this_thread()->wait_for_stop_or_ponderhit(); + this_thread->wait_for_stop_or_ponderhit(); // Best move could be MOVE_NONE when searching on a stalemate position cout << "bestmove " << move_to_uci(RootMoves[0].pv[0], Chess960) @@ -543,7 +543,7 @@ namespace { bool isPvMove, inCheck, singularExtensionNode, givesCheck; bool captureOrPromotion, dangerous, doFullDepthSearch; int moveCount = 0, playedMoveCount = 0; - Thread* thisThread = Threads.this_thread(); + Thread* thisThread = this_thread; SplitPoint* sp = NULL; refinedValue = bestValue = value = -VALUE_INFINITE; diff --git a/src/thread.cpp b/src/thread.cpp index faa3a571..7b9a4f32 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -28,6 +28,7 @@ using namespace Search; ThreadsManager Threads; // Global object +THREAD_LOCAL Thread* this_thread; // Thread local variable namespace { extern "C" { @@ -36,7 +37,7 @@ namespace { extern "C" { long start_routine(Thread* th) { - Threads.set_this_thread(th); // Save pointer into thread local storage + this_thread = th; // Save pointer into thread local storage (th->*(th->start_fn))(); return 0; } @@ -205,12 +206,11 @@ bool Thread::is_available_to(Thread* master) const { void ThreadsManager::init() { - tls_init(tlsKey); cond_init(sleepCond); lock_init(splitLock); timer = new Thread(&Thread::timer_loop); threads.push_back(new Thread(&Thread::main_loop)); - set_this_thread(main_thread()); // Use main thread's resources + this_thread = main_thread(); // Use main thread's resources read_uci_options(); } @@ -225,7 +225,6 @@ ThreadsManager::~ThreadsManager() { delete timer; lock_destroy(splitLock); cond_destroy(sleepCond); - tls_destroy(tlsKey); } @@ -314,7 +313,7 @@ Value ThreadsManager::split(Position& pos, Stack* ss, Value alpha, Value beta, assert(beta <= VALUE_INFINITE); assert(depth > DEPTH_ZERO); - Thread* master = this_thread(); + Thread* master = this_thread; if (master->splitPointsCnt >= MAX_SPLITPOINTS_PER_THREAD) return bestValue; diff --git a/src/thread.h b/src/thread.h index c39c6e69..e03578ee 100644 --- a/src/thread.h +++ b/src/thread.h @@ -120,8 +120,6 @@ public: int min_split_depth() const { return minimumSplitDepth; } int size() const { return (int)threads.size(); } Thread* main_thread() const { return threads[0]; } - Thread* this_thread() const { return (Thread*)tls_get(tlsKey); } - void set_this_thread(Thread* th) const { tls_set(tlsKey, th); } void wake_up() const; void sleep() const; @@ -140,7 +138,6 @@ private: std::vector threads; Thread* timer; - ThreadLocalStorageKey tlsKey; Lock splitLock; WaitCondition sleepCond; Depth minimumSplitDepth; @@ -149,5 +146,6 @@ private: }; extern ThreadsManager Threads; +extern THREAD_LOCAL Thread* this_thread; #endif // !defined(THREAD_H_INCLUDED) diff --git a/src/types.h b/src/types.h index 031cb0ba..2a3b41fa 100644 --- a/src/types.h +++ b/src/types.h @@ -64,6 +64,12 @@ # define FORCE_INLINE inline #endif +#if defined(__GNUC__) +# define THREAD_LOCAL __thread +#else +# define THREAD_LOCAL __declspec(thread) +#endif + #if defined(USE_POPCNT) const bool HasPopCnt = true; #else