X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fthread.cpp;h=30177a3915a84c9245644d779ca6fe0d99bc5a2b;hb=HEAD;hp=b79675f7c09dee12b68382691daab63db4a435b6;hpb=42b48b08e81b55e385e55b3074b7c59d81809a45;p=stockfish diff --git a/src/thread.cpp b/src/thread.cpp index b79675f7..b5d51594 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -1,7 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad + Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,369 +16,395 @@ along with this program. If not, see . */ -#include // For std::count +#include "thread.h" + +#include #include +#include +#include +#include +#include +#include #include "movegen.h" #include "search.h" -#include "thread.h" +#include "syzygy/tbprobe.h" +#include "timeman.h" +#include "types.h" #include "uci.h" - -using namespace Search; - -ThreadPool Threads; // Global object - -extern void check_time(); - -namespace { - - // start_routine() is the C function which is called when a new thread - // is launched. It is a wrapper to the virtual function idle_loop(). - - extern "C" { long start_routine(ThreadBase* th) { th->idle_loop(); return 0; } } - - - // Helpers to launch a thread after creation and joining before delete. Must be - // outside Thread c'tor and d'tor because the object must be fully initialized - // when start_routine (and hence virtual idle_loop) is called and when joining. - - template T* new_thread() { - T* th = new T(); - thread_create(th->handle, start_routine, th); // Will go to sleep - return th; - } - - void delete_thread(ThreadBase* th) { - - th->mutex.lock(); - th->exit = true; // Search must be already finished - th->mutex.unlock(); - - th->notify_one(); - thread_join(th->handle); // Wait for thread termination - delete th; - } - +#include "ucioption.h" + +namespace Stockfish { + +// Constructor launches the thread and waits until it goes to sleep +// in idle_loop(). Note that 'searching' and 'exit' should be already set. +Thread::Thread(Search::SharedState& sharedState, + std::unique_ptr sm, + size_t n, + OptionalThreadToNumaNodeBinder binder) : + idx(n), + nthreads(sharedState.options["Threads"]), + stdThread(&Thread::idle_loop, this) { + + wait_for_search_finished(); + + run_custom_job([this, &binder, &sharedState, &sm, n]() { + // Use the binder to [maybe] bind the threads to a NUMA node before doing + // the Worker allocation. Ideally we would also allocate the SearchManager + // here, but that's minor. + this->numaAccessToken = binder(); + this->worker = + std::make_unique(sharedState, std::move(sm), n, this->numaAccessToken); + }); + + wait_for_search_finished(); } -// ThreadBase::notify_one() wakes up the thread when there is some work to do +// Destructor wakes up the thread in idle_loop() and waits +// for its termination. Thread should be already waiting. +Thread::~Thread() { -void ThreadBase::notify_one() { + assert(!searching); - mutex.lock(); - sleepCondition.notify_one(); - mutex.unlock(); + exit = true; + start_searching(); + stdThread.join(); } +// Wakes up the thread that will start the search +void Thread::start_searching() { + assert(worker != nullptr); + run_custom_job([this]() { worker->start_searching(); }); +} -// ThreadBase::wait_for() set the thread to sleep until 'condition' turns true +// Clears the histories for the thread worker (usually before a new game) +void Thread::clear_worker() { + assert(worker != nullptr); + run_custom_job([this]() { worker->clear(); }); +} -void ThreadBase::wait_for(volatile const bool& condition) { +// Blocks on the condition variable until the thread has finished searching +void Thread::wait_for_search_finished() { - mutex.lock(); - while (!condition) sleepCondition.wait(mutex); - mutex.unlock(); + std::unique_lock lk(mutex); + cv.wait(lk, [&] { return !searching; }); } +// Launching a function in the thread +void Thread::run_custom_job(std::function f) { + { + std::unique_lock lk(mutex); + cv.wait(lk, [&] { return !searching; }); + jobFunc = std::move(f); + searching = true; + } + cv.notify_one(); +} -// Thread c'tor makes some init but does not launch any execution thread that -// will be started only when c'tor returns. +void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); } -Thread::Thread() /* : splitPoints() */ { // Value-initialization bug in MSVC +// Thread gets parked here, blocked on the condition variable +// when the thread has no work to do. - searching = false; - maxPly = splitPointsSize = 0; - activeSplitPoint = NULL; - activePosition = NULL; - idx = Threads.size(); // Starts from 0 -} +void Thread::idle_loop() { + while (true) + { + std::unique_lock lk(mutex); + searching = false; + cv.notify_one(); // Wake up anyone waiting for search finished + cv.wait(lk, [&] { return searching; }); + if (exit) + return; -// Thread::cutoff_occurred() checks whether a beta cutoff has occurred in the -// current active split point, or in some ancestor of the split point. + std::function job = std::move(jobFunc); + jobFunc = nullptr; -bool Thread::cutoff_occurred() const { + lk.unlock(); - for (SplitPoint* sp = activeSplitPoint; sp; sp = sp->parentSplitPoint) - if (sp->cutoff) - return true; + if (job) + job(); + } +} - return false; +Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); } + +uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } +uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } + +// Creates/destroys threads to match the requested number. +// Created and launched threads will immediately go to sleep in idle_loop. +// Upon resizing, threads are recreated to allow for binding if necessary. +void ThreadPool::set(const NumaConfig& numaConfig, + Search::SharedState sharedState, + const Search::SearchManager::UpdateContext& updateContext) { + + if (threads.size() > 0) // destroy any existing thread(s) + { + main_thread()->wait_for_search_finished(); + + threads.clear(); + + boundThreadToNumaNode.clear(); + } + + const size_t requested = sharedState.options["Threads"]; + + if (requested > 0) // create new thread(s) + { + // Binding threads may be problematic when there's multiple NUMA nodes and + // multiple Stockfish instances running. In particular, if each instance + // runs a single thread then they would all be mapped to the first NUMA node. + // This is undesirable, and so the default behaviour (i.e. when the user does not + // change the NumaConfig UCI setting) is to not bind the threads to processors + // unless we know for sure that we span NUMA nodes and replication is required. + const std::string numaPolicy(sharedState.options["NumaPolicy"]); + const bool doBindThreads = [&]() { + if (numaPolicy == "none") + return false; + + if (numaPolicy == "auto") + return numaConfig.suggests_binding_threads(requested); + + // numaPolicy == "system", or explicitly set by the user + return true; + }(); + + boundThreadToNumaNode = doBindThreads + ? numaConfig.distribute_threads_among_numa_nodes(requested) + : std::vector{}; + + while (threads.size() < requested) + { + const size_t threadId = threads.size(); + const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0; + auto manager = threadId == 0 ? std::unique_ptr( + std::make_unique(updateContext)) + : std::make_unique(); + + // When not binding threads we want to force all access to happen + // from the same NUMA node, because in case of NUMA replicated memory + // accesses we don't want to trash cache in case the threads get scheduled + // on the same NUMA node. + auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId) + : OptionalThreadToNumaNodeBinder(numaId); + + threads.emplace_back( + std::make_unique(sharedState, std::move(manager), threadId, binder)); + } + + clear(); + + main_thread()->wait_for_search_finished(); + } } -// Thread::available_to() checks whether the thread is available to help the -// thread 'master' at a split point. An obvious requirement is that thread must -// be idle. With more than two threads, this is not sufficient: If the thread is -// the master of some split point, it is only available as a slave to the slaves -// which are busy searching the split point at the top of slave's split point -// stack (the "helpful master concept" in YBWC terminology). +// Sets threadPool data to initial values +void ThreadPool::clear() { + if (threads.size() == 0) + return; -bool Thread::available_to(const Thread* master) const { + for (auto&& th : threads) + th->clear_worker(); - if (searching) - return false; + for (auto&& th : threads) + th->wait_for_search_finished(); - // Make a local copy to be sure it doesn't become zero under our feet while - // testing next condition and so leading to an out of bounds access. - const int size = splitPointsSize; + // These two affect the time taken on the first move of a game: + main_manager()->bestPreviousAverageScore = VALUE_INFINITE; + main_manager()->previousTimeReduction = 0.85; - // No split points means that the thread is available as a slave for any - // other thread otherwise apply the "helpful master" concept if possible. - return !size || splitPoints[size - 1].slavesMask.test(master->idx); + main_manager()->callsCnt = 0; + main_manager()->bestPreviousScore = VALUE_INFINITE; + main_manager()->originalTimeAdjust = -1; + main_manager()->tm.clear(); } - -// Thread::split() does the actual work of distributing the work at a node between -// several available threads. If it does not succeed in splitting the node -// (because no idle threads are available), the function immediately returns. -// If splitting is possible, a SplitPoint object is initialized with all the -// data that must be copied to the helper threads and then helper threads are -// informed that they have been assigned work. This will cause them to instantly -// leave their idle loops and call search(). When all threads have returned from -// search() then split() returns. - -void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bestValue, - Move* bestMove, Depth depth, int moveCount, - MovePicker* movePicker, int nodeType, bool cutNode) { - - assert(searching); - assert(-VALUE_INFINITE < *bestValue && *bestValue <= alpha && alpha < beta && beta <= VALUE_INFINITE); - assert(depth >= Threads.minimumSplitDepth); - assert(splitPointsSize < MAX_SPLITPOINTS_PER_THREAD); - - // Pick and init the next available split point - SplitPoint& sp = splitPoints[splitPointsSize]; - - sp.masterThread = this; - sp.parentSplitPoint = activeSplitPoint; - sp.slavesMask = 0, sp.slavesMask.set(idx); - sp.depth = depth; - sp.bestValue = *bestValue; - sp.bestMove = *bestMove; - sp.alpha = alpha; - sp.beta = beta; - sp.nodeType = nodeType; - sp.cutNode = cutNode; - sp.movePicker = movePicker; - sp.moveCount = moveCount; - sp.pos = &pos; - sp.nodes = 0; - sp.cutoff = false; - sp.ss = ss; - - // Try to allocate available threads and ask them to start searching setting - // 'searching' flag. This must be done under lock protection to avoid concurrent - // allocation of the same slave by another master. - Threads.mutex.lock(); - sp.mutex.lock(); - - sp.allSlavesSearching = true; // Must be set under lock protection - ++splitPointsSize; - activeSplitPoint = &sp; - activePosition = NULL; - - Thread* slave; - - while ((slave = Threads.available_slave(this)) != NULL) - { - sp.slavesMask.set(slave->idx); - slave->activeSplitPoint = &sp; - slave->searching = true; // Slave leaves idle_loop() - slave->notify_one(); // Could be sleeping - } - - // Everything is set up. The master thread enters the idle loop, from which - // it will instantly launch a search, because its 'searching' flag is set. - // The thread will return from the idle loop when all slaves have finished - // their work at this split point. - sp.mutex.unlock(); - Threads.mutex.unlock(); - - Thread::idle_loop(); // Force a call to base class idle_loop() - - // In the helpful master concept, a master can help only a sub-tree of its - // split point and because everything is finished here, it's not possible - // for the master to be booked. - assert(!searching); - assert(!activePosition); - - // We have returned from the idle loop, which means that all threads are - // finished. Note that setting 'searching' and decreasing splitPointsSize must - // be done under lock protection to avoid a race with Thread::available_to(). - Threads.mutex.lock(); - sp.mutex.lock(); - - searching = true; - --splitPointsSize; - activeSplitPoint = sp.parentSplitPoint; - activePosition = &pos; - pos.set_nodes_searched(pos.nodes_searched() + sp.nodes); - *bestMove = sp.bestMove; - *bestValue = sp.bestValue; - - sp.mutex.unlock(); - Threads.mutex.unlock(); +void ThreadPool::run_on_thread(size_t threadId, std::function f) { + assert(threads.size() > threadId); + threads[threadId]->run_custom_job(std::move(f)); } +void ThreadPool::wait_on_thread(size_t threadId) { + assert(threads.size() > threadId); + threads[threadId]->wait_for_search_finished(); +} -// TimerThread::idle_loop() is where the timer thread waits Resolution milliseconds -// and then calls check_time(). When not searching, thread sleeps until it's woken up. - -void TimerThread::idle_loop() { +size_t ThreadPool::num_threads() const { return threads.size(); } - while (!exit) - { - mutex.lock(); - if (!exit) - sleepCondition.wait_for(mutex, run ? Resolution : INT_MAX); +// Wakes up main thread waiting in idle_loop() and returns immediately. +// Main thread will wake up other threads and start the search. +void ThreadPool::start_thinking(const OptionsMap& options, + Position& pos, + StateListPtr& states, + Search::LimitsType limits) { - mutex.unlock(); + main_thread()->wait_for_search_finished(); - if (run) - check_time(); - } -} + main_manager()->stopOnPonderhit = stop = abortedSearch = false; + main_manager()->ponder = limits.ponderMode; + increaseDepth = true; -// MainThread::idle_loop() is where the main thread is parked waiting to be started -// when there is a new search. The main thread will launch all the slave threads. + Search::RootMoves rootMoves; + const auto legalmoves = MoveList(pos); -void MainThread::idle_loop() { + for (const auto& uciMove : limits.searchmoves) + { + auto move = UCIEngine::to_move(pos, uciMove); - while (!exit) - { - mutex.lock(); + if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end()) + rootMoves.emplace_back(move); + } - thinking = false; + if (rootMoves.empty()) + for (const auto& m : legalmoves) + rootMoves.emplace_back(m); - while (!thinking && !exit) - { - Threads.sleepCondition.notify_one(); // Wake up the UI thread if needed - sleepCondition.wait(mutex); - } + Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves); - mutex.unlock(); + // After ownership transfer 'states' becomes empty, so if we stop the search + // and call 'go' again without setting a new position states.get() == nullptr. + assert(states.get() || setupStates.get()); - if (!exit) - { - searching = true; + if (states.get()) + setupStates = std::move(states); // Ownership transfer, states is now empty - Search::think(); + // We use Position::set() to set root position across threads. But there are + // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot + // be deduced from a fen string, so set() clears them and they are set from + // setupStates->back() later. The rootState is per thread, earlier states are + // shared since they are read-only. + for (auto&& th : threads) + { + th->run_custom_job([&]() { + th->worker->limits = limits; + th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly = + th->worker->bestMoveChanges = 0; + th->worker->rootDepth = th->worker->completedDepth = 0; + th->worker->rootMoves = rootMoves; + th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); + th->worker->rootState = setupStates->back(); + th->worker->tbConfig = tbConfig; + }); + } - assert(searching); + for (auto&& th : threads) + th->wait_for_search_finished(); - searching = false; - } - } + main_thread()->start_searching(); } - -// ThreadPool::init() is called at startup to create and launch requested threads, -// that will go immediately to sleep. We cannot use a c'tor because Threads is a -// static object and we need a fully initialized engine at this point due to -// allocation of Endgames in Thread c'tor. - -void ThreadPool::init() { - - timer = new_thread(); - push_back(new_thread()); - read_uci_options(); +Thread* ThreadPool::get_best_thread() const { + + Thread* bestThread = threads.front().get(); + Value minScore = VALUE_NONE; + + std::unordered_map votes( + 2 * std::min(size(), bestThread->worker->rootMoves.size())); + + // Find the minimum score of all threads + for (auto&& th : threads) + minScore = std::min(minScore, th->worker->rootMoves[0].score); + + // Vote according to score and depth, and select the best thread + auto thread_voting_value = [minScore](Thread* th) { + return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); + }; + + for (auto&& th : threads) + votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get()); + + for (auto&& th : threads) + { + const auto bestThreadScore = bestThread->worker->rootMoves[0].score; + const auto newThreadScore = th->worker->rootMoves[0].score; + + const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv; + const auto& newThreadPV = th->worker->rootMoves[0].pv; + + const auto bestThreadMoveVote = votes[bestThreadPV[0]]; + const auto newThreadMoveVote = votes[newThreadPV[0]]; + + const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; + const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY; + + const bool bestThreadInProvenLoss = + bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; + const bool newThreadInProvenLoss = + newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY; + + // We make sure not to pick a thread with truncated principal variation + const bool betterVotingValue = + thread_voting_value(th.get()) * int(newThreadPV.size() > 2) + > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); + + if (bestThreadInProvenWin) + { + // Make sure we pick the shortest mate / TB conversion + if (newThreadScore > bestThreadScore) + bestThread = th.get(); + } + else if (bestThreadInProvenLoss) + { + // Make sure we pick the shortest mated / TB conversion + if (newThreadInProvenLoss && newThreadScore < bestThreadScore) + bestThread = th.get(); + } + else if (newThreadInProvenWin || newThreadInProvenLoss + || (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY + && (newThreadMoveVote > bestThreadMoveVote + || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) + bestThread = th.get(); + } + + return bestThread; } -// ThreadPool::exit() terminates the threads before the program exits. Cannot be -// done in d'tor because threads must be terminated before freeing us. - -void ThreadPool::exit() { +// Start non-main threads. +// Will be invoked by main thread after it has started searching. +void ThreadPool::start_searching() { - delete_thread(timer); // As first because check_time() accesses threads data - - for (iterator it = begin(); it != end(); ++it) - delete_thread(*it); + for (auto&& th : threads) + if (th != threads.front()) + th->start_searching(); } -// ThreadPool::read_uci_options() updates internal threads parameters from the -// corresponding UCI options and creates/destroys threads to match the requested -// number. Thread objects are dynamically allocated to avoid creating all possible -// threads in advance (which include pawns and material tables), even if only a -// few are to be used. - -void ThreadPool::read_uci_options() { - - minimumSplitDepth = Options["Min Split Depth"] * ONE_PLY; - size_t requested = Options["Threads"]; - - assert(requested > 0); - - // If zero (default) then set best minimum split depth automatically - if (!minimumSplitDepth) - minimumSplitDepth = requested < 8 ? 4 * ONE_PLY : 7 * ONE_PLY; - - while (size() < requested) - push_back(new_thread()); +// Wait for non-main threads +void ThreadPool::wait_for_search_finished() const { - while (size() > requested) - { - delete_thread(back()); - pop_back(); - } + for (auto&& th : threads) + if (th != threads.front()) + th->wait_for_search_finished(); } +std::vector ThreadPool::get_bound_thread_count_by_numa_node() const { + std::vector counts; -// ThreadPool::available_slave() tries to find an idle thread which is available -// as a slave for the thread 'master'. + if (!boundThreadToNumaNode.empty()) + { + NumaIndex highestNumaNode = 0; + for (NumaIndex n : boundThreadToNumaNode) + if (n > highestNumaNode) + highestNumaNode = n; -Thread* ThreadPool::available_slave(const Thread* master) const { + counts.resize(highestNumaNode + 1, 0); - for (const_iterator it = begin(); it != end(); ++it) - if ((*it)->available_to(master)) - return *it; + for (NumaIndex n : boundThreadToNumaNode) + counts[n] += 1; + } - return NULL; + return counts; } - -// ThreadPool::wait_for_think_finished() waits for main thread to finish the search - -void ThreadPool::wait_for_think_finished() { - - MainThread* th = main(); - th->mutex.lock(); - while (th->thinking) sleepCondition.wait(th->mutex); - th->mutex.unlock(); +void ThreadPool::ensure_network_replicated() { + for (auto&& th : threads) + th->ensure_network_replicated(); } - -// ThreadPool::start_thinking() wakes up the main thread sleeping in -// MainThread::idle_loop() and starts a new search, then returns immediately. - -void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits, - StateStackPtr& states) { - wait_for_think_finished(); - - SearchTime = Time::now(); // As early as possible - - Signals.stopOnPonderhit = Signals.firstRootMove = false; - Signals.stop = Signals.failedLowAtRoot = false; - - RootMoves.clear(); - RootPos = pos; - Limits = limits; - if (states.get()) // If we don't set a new position, preserve current state - { - SetupStates = states; // Ownership transfer here - assert(!states.get()); - } - - for (MoveList it(pos); *it; ++it) - if ( limits.searchmoves.empty() - || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), *it)) - RootMoves.push_back(RootMove(*it)); - - main()->thinking = true; - main()->notify_one(); // Starts main thread -} +} // namespace Stockfish