From: Joost VandeVondele Date: Tue, 26 Dec 2017 09:40:42 +0000 (+0100) Subject: Upon changing the number of threads, make sure all threads are bound X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=1c50d8cbf554733c0db6ab423b413d75cc0c1928;hp=2198cd0524574f0d9df8c0ec9aaf14ad8c94402b Upon changing the number of threads, make sure all threads are bound The heuristic to avoid thread binding if less than 8 threads are requested resulted in the first 7 threads not being bound. The branch was verified to yield a roughly 13% speedup by @CoffeeOne on the appropriate hardware and OS, and an earlier version of this patch tested well on his machine: http://tests.stockfishchess.org/tests/view/5a3693480ebc590ccbb8be5a ELO: 9.24 +-4.6 (95%) LOS: 100.0% Total: 5000 W: 634 L: 501 D: 3865 To make sure all threads (including mainThread) are bound as soon as the total number exceeds 7, recreate all threads on a change of thread number. To do this, unify Threads::init, Threads::exit and Threads::set are unified in a single Threads::set function that goes through the needed steps. The code includes several suggestions from @joergoster. Fixes issue #1312 No functional change --- diff --git a/src/main.cpp b/src/main.cpp index 065fe8eb..5e108eeb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,11 +45,11 @@ int main(int argc, char* argv[]) { Pawns::init(); Tablebases::init(Options["SyzygyPath"]); TT.resize(Options["Hash"]); - Threads.init(Options["Threads"]); + Threads.set(Options["Threads"]); Search::clear(); // After threads are up UCI::loop(argc, argv); - Threads.exit(); + Threads.set(0); return 0; } diff --git a/src/misc.cpp b/src/misc.cpp index d815c9c4..1f543ea2 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -293,14 +293,6 @@ int get_group(size_t idx) { void bindThisThread(size_t idx) { - // If OS already scheduled us on a different group than 0 then don't overwrite - // the choice, eventually we are one of many one-threaded processes running on - // some Windows NUMA hardware, for instance in fishtest. To make it simple, - // just check if running threads are below a threshold, in this case all this - // NUMA machinery is not needed. - if (Threads.size() < 8) - return; - // Use only local variables to be thread-safe int group = get_group(idx); diff --git a/src/search.cpp b/src/search.cpp index 1aa3e92e..c72f0afb 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -173,13 +173,7 @@ void Search::clear() { Time.availableNodes = 0; TT.clear(); - - for (Thread* th : Threads) - th->clear(); - - Threads.main()->callsCnt = 0; - Threads.main()->previousScore = VALUE_INFINITE; - Threads.main()->previousTimeReduction = 1; + Threads.clear(); } diff --git a/src/thread.cpp b/src/thread.cpp index 58d693cf..c858817a 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -24,6 +24,7 @@ #include "movegen.h" #include "search.h" #include "thread.h" +#include "uci.h" #include "syzygy/tbprobe.h" ThreadPool Threads; // Global object @@ -35,7 +36,6 @@ ThreadPool Threads; // Global object Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) { wait_for_search_finished(); - clear(); // Zero-init histories (based on std::array) } @@ -92,7 +92,13 @@ void Thread::wait_for_search_finished() { void Thread::idle_loop() { - WinProcGroup::bindThisThread(idx); + // If OS already scheduled us on a different group than 0 then don't overwrite + // the choice, eventually we are one of many one-threaded processes running on + // some Windows NUMA hardware, for instance in fishtest. To make it simple, + // just check if running threads are below a threshold, in this case all this + // NUMA machinery is not needed. + if (Options["Threads"] >= 8) + WinProcGroup::bindThisThread(idx); while (true) { @@ -110,42 +116,40 @@ void Thread::idle_loop() { } } +/// ThreadPool::set() creates/destroys threads to match the requested number. +/// Created and launced threads wil go immediately to sleep in idle_loop. +/// Upon resizing, threads are recreated to allow for binding if necessary. -/// ThreadPool::init() creates and launches the threads that will go -/// immediately to sleep in idle_loop. We cannot use the constructor because -/// Threads is a static object and we need a fully initialized engine at -/// this point due to allocation of Endgames in the Thread constructor. - -void ThreadPool::init(size_t requested) { - - push_back(new MainThread(0)); - set(requested); -} +void ThreadPool::set(size_t requested) { + if (size() > 0) { // destroy any existing thread(s) + main()->wait_for_search_finished(); -/// ThreadPool::exit() terminates threads before the program exits. Cannot be -/// done in the destructor because threads must be terminated before deleting -/// any static object, so before main() returns. + while (size() > 0) + delete back(), pop_back(); + } -void ThreadPool::exit() { + if (requested > 0) { // create new thread(s) + push_back(new MainThread(0)); - main()->wait_for_search_finished(); - set(0); + while (size() < requested) + push_back(new Thread(size())); + clear(); + } } +/// ThreadPool::clear() sets threadPool data to initial values. -/// ThreadPool::set() creates/destroys threads to match the requested number +void ThreadPool::clear() { -void ThreadPool::set(size_t requested) { + for (Thread* th : *this) + th->clear(); - while (size() < requested) - push_back(new Thread(size())); - - while (size() > requested) - delete back(), pop_back(); + main()->callsCnt = 0; + main()->previousScore = VALUE_INFINITE; + main()->previousTimeReduction = 1; } - /// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and /// returns immediately. Main thread will wake up other threads and start the search. @@ -181,7 +185,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, // is shared by threads but is accessed in read-only mode. StateInfo tmp = setupStates->back(); - for (Thread* th : Threads) + for (Thread* th : *this) { th->nodes = th->tbHits = 0; th->rootDepth = th->completedDepth = DEPTH_ZERO; diff --git a/src/thread.h b/src/thread.h index e2009a7d..be486acd 100644 --- a/src/thread.h +++ b/src/thread.h @@ -96,9 +96,8 @@ struct MainThread : public Thread { struct ThreadPool : public std::vector { - void init(size_t); // No constructor and destructor, threads rely on globals that should - void exit(); // be initialized and valid during the whole thread lifetime. void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); + void clear(); void set(size_t); MainThread* main() const { return static_cast(front()); }