From ca677526452823d1fe89543762edb66684e7bdc7 Mon Sep 17 00:00:00 2001 From: syzygy Date: Thu, 20 Oct 2016 21:16:09 +0200 Subject: [PATCH] Per-thread TB hit counters Use a per-thread counter to reduce contention with many cores and endgame positions. Measured around 1% speed-up on a 12 core and 8% on 28 cores with 6-men, searching on: 7R/1p3k2/2p2P2/3nR1P1/8/3b1P2/7K/r7 b - - 3 38 Also retire the unused set_nodes_searched() and fix a couple of return types and naming conventions. No functional change. --- src/position.h | 5 ----- src/search.cpp | 28 +++++++++++----------------- src/thread.cpp | 17 ++++++++++++++--- src/thread.h | 4 +++- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/position.h b/src/position.h index e74a3c71..9aa4c445 100644 --- a/src/position.h +++ b/src/position.h @@ -148,7 +148,6 @@ public: bool is_chess960() const; Thread* this_thread() const; uint64_t nodes_searched() const; - void set_nodes_searched(uint64_t n); bool is_draw() const; int rule50_count() const; Score psq_score() const; @@ -341,10 +340,6 @@ inline uint64_t Position::nodes_searched() const { return nodes; } -inline void Position::set_nodes_searched(uint64_t n) { - nodes = n; -} - inline bool Position::opposite_bishops() const { return pieceCount[W_BISHOP] == 1 && pieceCount[B_BISHOP] == 1 diff --git a/src/search.cpp b/src/search.cpp index 3d559a0d..e4c903a5 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -46,7 +46,6 @@ namespace Search { namespace Tablebases { int Cardinality; - uint64_t Hits; bool RootInTB; bool UseRule50; Depth ProbeDepth; @@ -672,7 +671,7 @@ namespace { if (found) { - TB::Hits++; + thisThread->tbHits++; int drawScore = TB::UseRule50 ? 1 : 0; @@ -1519,7 +1518,7 @@ moves_loop: // When in check search starts from here if ( (Limits.use_time_management() && elapsed > Time.maximum() - 10) || (Limits.movetime && elapsed >= Limits.movetime) - || (Limits.nodes && Threads.nodes_searched() >= Limits.nodes)) + || (Limits.nodes && Threads.nodes_searched() >= (uint64_t)Limits.nodes)) Signals.stop = true; } @@ -1536,7 +1535,8 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { const RootMoves& rootMoves = pos.this_thread()->rootMoves; size_t PVIdx = pos.this_thread()->PVIdx; size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size()); - uint64_t nodes_searched = Threads.nodes_searched(); + uint64_t nodesSearched = Threads.nodes_searched(); + uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); for (size_t i = 0; i < multiPV; ++i) { @@ -1563,13 +1563,13 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { if (!tb && i == PVIdx) ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); - ss << " nodes " << nodes_searched - << " nps " << nodes_searched * 1000 / elapsed; + ss << " nodes " << nodesSearched + << " nps " << nodesSearched * 1000 / elapsed; if (elapsed > 1000) // Earlier makes little sense ss << " hashfull " << TT.hashfull(); - ss << " tbhits " << TB::Hits + ss << " tbhits " << tbHits << " time " << elapsed << " pv"; @@ -1612,7 +1612,6 @@ bool RootMove::extract_ponder_from_tt(Position& pos) { void Tablebases::filter_root_moves(Position& pos, Search::RootMoves& rootMoves) { - Hits = 0; RootInTB = false; UseRule50 = Options["Syzygy50MoveRule"]; ProbeDepth = Options["SyzygyProbeDepth"] * ONE_PLY; @@ -1645,13 +1644,8 @@ void Tablebases::filter_root_moves(Position& pos, Search::RootMoves& rootMoves) Cardinality = 0; } - if (RootInTB) - { - Hits = rootMoves.size(); - - if (!UseRule50) - TB::Score = TB::Score > VALUE_DRAW ? VALUE_MATE - MAX_PLY - 1 - : TB::Score < VALUE_DRAW ? -VALUE_MATE + MAX_PLY + 1 - : VALUE_DRAW; - } + if (RootInTB && !UseRule50) + TB::Score = TB::Score > VALUE_DRAW ? VALUE_MATE - MAX_PLY - 1 + : TB::Score < VALUE_DRAW ? -VALUE_MATE + MAX_PLY + 1 + : VALUE_DRAW; } diff --git a/src/thread.cpp b/src/thread.cpp index 2923c07f..4d290d7c 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -35,7 +35,7 @@ ThreadPool Threads; // Global object Thread::Thread() { resetCalls = exit = false; - maxPly = callsCnt = 0; + maxPly = callsCnt = tbHits = 0; history.clear(); counterMoves.clear(); idx = Threads.size(); // Start from 0 @@ -158,15 +158,26 @@ void ThreadPool::read_uci_options() { /// ThreadPool::nodes_searched() returns the number of nodes searched -int64_t ThreadPool::nodes_searched() { +uint64_t ThreadPool::nodes_searched() { - int64_t nodes = 0; + uint64_t nodes = 0; for (Thread* th : *this) nodes += th->rootPos.nodes_searched(); return nodes; } +/// ThreadPool::tb_hits() returns the number of TB hits + +uint64_t ThreadPool::tb_hits() { + + uint64_t hits = 0; + for (Thread* th : *this) + hits += th->tbHits; + return hits; +} + + /// ThreadPool::start_thinking() wakes up the main thread sleeping in idle_loop() /// and starts a new search, then returns immediately. diff --git a/src/thread.h b/src/thread.h index 195c3b3b..408aaef7 100644 --- a/src/thread.h +++ b/src/thread.h @@ -62,6 +62,7 @@ public: Endgames endgames; size_t idx, PVIdx; int maxPly, callsCnt; + uint64_t tbHits; Position rootPos; Search::RootMoves rootMoves; @@ -98,7 +99,8 @@ struct ThreadPool : public std::vector { MainThread* main() { return static_cast(at(0)); } void start_thinking(Position&, StateListPtr&, const Search::LimitsType&); void read_uci_options(); - int64_t nodes_searched(); + uint64_t nodes_searched(); + uint64_t tb_hits(); private: StateListPtr setupStates; -- 2.39.2