From: Joost VandeVondele Date: Mon, 1 Jul 2019 12:07:23 +0000 (+0200) Subject: Introduce coordination between searching threads (#2204) X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=217840a6a5a40b516cab59a450a9f36352997240;ds=inline Introduce coordination between searching threads (#2204) this patch improves threading performance by introducing some coordination between threads. The observation is that threading is an area where a lot of Elo can potentially be gained: https://github.com/glinscott/fishtest/wiki/UsefulData#elo-from-threading At STC, 8 threads gain roughly 320 Elo, vs sequential at the same time, however, loses 66 Elo against a single thread with 8x more time. This 66 Elo should be partially recoverable with improved threading. To improve threading, this patch introduces some LMR at nodes that are already being searched by other threads. This requires some coordination between threads, avoiding however synchronisation. To do so, threads leave a trail of breadcrumbs to mark the nodes they are searching. These breadcrumbs are stored in a small hash table, which is only probed at low plies (currently ply < 8). A couple of variants of this patch passed both STC and LTC threaded tests. I picked the simpler, more robust version. I expect that further tests can find further improvements. STC (5+0.05 @ 8 threads): LLR: 2.95 (-2.94,2.94) [0.50,4.50] Total: 26209 W: 5359 L: 5079 D: 15771 http://tests.stockfishchess.org/tests/view/5d0a9b030ebc5925cf0a8e6f LTC (20+0.2 @ 8 threads): LLR: 2.96 (-2.94,2.94) [0.00,3.50] Total: 34832 W: 5650 L: 5382 D: 23800 http://tests.stockfishchess.org/tests/view/5d0c67a20ebc5925cf0aafa7 other passed/tested variants: http://tests.stockfishchess.org/tests/view/5d0a9b030ebc5925cf0a8e6f http://tests.stockfishchess.org/tests/view/5d0c67ca0ebc5925cf0aafa9 http://tests.stockfishchess.org/tests/view/5d0c67810ebc5925cf0aafa3 http://tests.stockfishchess.org/tests/view/5d0958ca0ebc5925cf0a74c6 For the sequential code there is no change in bench, and an earlier version of this patch passed a non-regression test. STC (10+0.1 @ 1 thread) LLR: 2.96 (-2.94,2.94) [-3.00,1.00] Total: 10471 W: 2364 L: 2220 D: 5887 http://tests.stockfishchess.org/tests/view/5d087ee20ebc5925cf0a6381 passed the additional non-regression tests at 2 and 4 threads 20+0.2 TC. The code was rebased on master prior to testing. 2 threads: LLR: 2.95 (-2.94,2.94) [-3.00,1.00] Total: 218863 W: 40927 L: 41153 D: 136783 http://tests.stockfishchess.org/tests/view/5d18c6c30ebc5925cf0b9566 4threads: LLR: 2.96 (-2.94,2.94) [-3.00,1.00] Total: 16839 W: 3017 L: 2889 D: 10933 http://tests.stockfishchess.org/tests/view/5d18c6ea0ebc5925cf0b9568 No functional change. --- diff --git a/src/search.cpp b/src/search.cpp index 37fc2ec4..e008bec1 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -102,6 +102,48 @@ namespace { Move best = MOVE_NONE; }; + // Breadcrumbs are used to mark nodes as being searched by a given thread. + struct Breadcrumb { + std::atomic thread; + std::atomic key; + }; + std::array breadcrumbs; + + // ThreadHolding keeps track of which thread left breadcrumbs at the given node for potential reductions. + // A free node will be marked upon entering the moves loop, and unmarked upon leaving that loop, by the ctor/dtor of this struct. + struct ThreadHolding { + explicit ThreadHolding(Thread* thisThread, Key posKey, int ply) { + location = ply < 8 ? &breadcrumbs[posKey & (breadcrumbs.size() - 1)] : nullptr; + otherThread = false; + owning = false; + if (location) + { + // see if another already marked this location, if not, mark it ourselves. + Thread* tmp = (*location).thread.load(std::memory_order_relaxed); + if (tmp == nullptr) + { + (*location).thread.store(thisThread, std::memory_order_relaxed); + (*location).key.store(posKey, std::memory_order_relaxed); + owning = true; + } + else if ( tmp != thisThread + && (*location).key.load(std::memory_order_relaxed) == posKey) + otherThread = true; + } + } + + ~ThreadHolding() { + if (owning) // free the marked location. + (*location).thread.store(nullptr, std::memory_order_relaxed); + } + + bool marked() { return otherThread; } + + private: + Breadcrumb* location; + bool otherThread, owning; + }; + template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); @@ -847,6 +889,9 @@ moves_loop: // When in check, search starts from here moveCountPruning = false; ttCapture = ttMove && pos.capture_or_promotion(ttMove); + // Mark this node as being searched. + ThreadHolding th(thisThread, posKey, ss->ply); + // Step 12. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) @@ -1019,6 +1064,10 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); + // Reduction if other threads are searching this position. + if (th.marked()) + r += ONE_PLY; + // Decrease reduction if position is or has been on the PV if (ttPv) r -= 2 * ONE_PLY;