From 38112060dc2da351c6dde8f12d0ee5cfaeac5084 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Sun, 22 Feb 2015 14:59:55 +0100 Subject: [PATCH] Use spinlock instead of mutex for Threads and SplitPoint It is reported to be defenitly faster with increasing number of threads, we go from a +3.5% with 4 threads to a +15% with 16 threads. The only drawback is that now when testing with more threads than physical available cores, the speed slows down to a crawl. This is expected and was similar at what we had setting the old sleepingThreads to false. No functional change. --- src/search.cpp | 36 ++++++++++++++++++------------------ src/thread.cpp | 16 ++++++++-------- src/thread.h | 30 +++++++++++++++--------------- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 89330895..5dacc877 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -765,7 +765,7 @@ moves_loop: // When in check and at SpNode search starts from here continue; moveCount = ++splitPoint->moveCount; - splitPoint->mutex.unlock(); + splitPoint->spinlock.release(); } else ++moveCount; @@ -834,7 +834,7 @@ moves_loop: // When in check and at SpNode search starts from here && moveCount >= FutilityMoveCounts[improving][depth]) { if (SpNode) - splitPoint->mutex.lock(); + splitPoint->spinlock.acquire(); continue; } @@ -853,7 +853,7 @@ moves_loop: // When in check and at SpNode search starts from here if (SpNode) { - splitPoint->mutex.lock(); + splitPoint->spinlock.acquire(); if (bestValue > splitPoint->bestValue) splitPoint->bestValue = bestValue; } @@ -865,7 +865,7 @@ moves_loop: // When in check and at SpNode search starts from here if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO) { if (SpNode) - splitPoint->mutex.lock(); + splitPoint->spinlock.acquire(); continue; } @@ -965,7 +965,7 @@ moves_loop: // When in check and at SpNode search starts from here // Step 18. Check for new best move if (SpNode) { - splitPoint->mutex.lock(); + splitPoint->spinlock.acquire(); bestValue = splitPoint->bestValue; alpha = splitPoint->alpha; } @@ -1526,13 +1526,13 @@ void Thread::idle_loop() { // If this thread has been assigned work, launch a search while (searching) { - Threads.mutex.lock(); + Threads.spinlock.acquire(); assert(activeSplitPoint); SplitPoint* sp = activeSplitPoint; - Threads.mutex.unlock(); + Threads.spinlock.release(); Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2) Position pos(*sp->pos, this); @@ -1540,7 +1540,7 @@ void Thread::idle_loop() { std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack)); ss->splitPoint = sp; - sp->mutex.lock(); + sp->spinlock.acquire(); assert(activePosition == nullptr); @@ -1578,7 +1578,7 @@ void Thread::idle_loop() { // After releasing the lock we can't access any SplitPoint related data // in a safe way because it could have been released under our feet by // the sp master. - sp->mutex.unlock(); + sp->spinlock.release(); // Try to late join to another split point if none of its slaves has // already finished. @@ -1593,7 +1593,7 @@ void Thread::idle_loop() { if ( sp && sp->allSlavesSearching && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT - && available_to(th)) + && available_to(sp->master)) { assert(this != th); assert(!(this_sp && this_sp->slavesMask.none())); @@ -1618,8 +1618,8 @@ void Thread::idle_loop() { sp = bestSp; // Recheck the conditions under lock protection - Threads.mutex.lock(); - sp->mutex.lock(); + Threads.spinlock.acquire(); + sp->spinlock.acquire(); if ( sp->allSlavesSearching && sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT @@ -1630,8 +1630,8 @@ void Thread::idle_loop() { searching = true; } - sp->mutex.unlock(); - Threads.mutex.unlock(); + sp->spinlock.release(); + Threads.spinlock.release(); } } @@ -1687,7 +1687,7 @@ void check_time() { else if (Limits.nodes) { - Threads.mutex.lock(); + Threads.spinlock.acquire(); int64_t nodes = RootPos.nodes_searched(); @@ -1698,7 +1698,7 @@ void check_time() { { SplitPoint& sp = th->splitPoints[i]; - sp.mutex.lock(); + sp.spinlock.acquire(); nodes += sp.nodes; @@ -1706,10 +1706,10 @@ void check_time() { if (sp.slavesMask.test(idx) && Threads[idx]->activePosition) nodes += Threads[idx]->activePosition->nodes_searched(); - sp.mutex.unlock(); + sp.spinlock.release(); } - Threads.mutex.unlock(); + Threads.spinlock.release(); if (nodes >= Limits.nodes) Signals.stop = true; diff --git a/src/thread.cpp b/src/thread.cpp index a466df87..1217e3ab 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -165,8 +165,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes // Try to allocate available threads and ask them to start searching setting // 'searching' flag. This must be done under lock protection to avoid concurrent // allocation of the same slave by another master. - Threads.mutex.lock(); - sp.mutex.lock(); + Threads.spinlock.acquire(); + sp.spinlock.acquire(); sp.allSlavesSearching = true; // Must be set under lock protection ++splitPointsSize; @@ -188,8 +188,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes // it will instantly launch a search, because its 'searching' flag is set. // The thread will return from the idle loop when all slaves have finished // their work at this split point. - sp.mutex.unlock(); - Threads.mutex.unlock(); + sp.spinlock.release(); + Threads.spinlock.release(); Thread::idle_loop(); // Force a call to base class idle_loop() @@ -202,8 +202,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes // We have returned from the idle loop, which means that all threads are // finished. Note that setting 'searching' and decreasing splitPointsSize must // be done under lock protection to avoid a race with Thread::available_to(). - Threads.mutex.lock(); - sp.mutex.lock(); + Threads.spinlock.acquire(); + sp.spinlock.acquire(); searching = true; --splitPointsSize; @@ -213,8 +213,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes *bestMove = sp.bestMove; *bestValue = sp.bestValue; - sp.mutex.unlock(); - Threads.mutex.unlock(); + sp.spinlock.release(); + Threads.spinlock.release(); } diff --git a/src/thread.h b/src/thread.h index 166ea80f..f8e65941 100644 --- a/src/thread.h +++ b/src/thread.h @@ -39,6 +39,19 @@ const size_t MAX_THREADS = 128; const size_t MAX_SPLITPOINTS_PER_THREAD = 8; const size_t MAX_SLAVES_PER_SPLITPOINT = 4; +/// Spinlock class wraps low level atomic operations to provide spin lock functionality + +class Spinlock { + + std::atomic_flag lock; + +public: + Spinlock() { std::atomic_flag_clear(&lock); } + void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} } + void release() { lock.clear(std::memory_order_release); } +}; + + /// SplitPoint struct stores information shared by the threads searching in /// parallel below the same split point. It is populated at splitting time. @@ -58,7 +71,7 @@ struct SplitPoint { SplitPoint* parentSplitPoint; // Shared variable data - std::mutex mutex; + Spinlock spinlock; std::bitset slavesMask; volatile bool allSlavesSearching; volatile uint64_t nodes; @@ -70,19 +83,6 @@ struct SplitPoint { }; -/// Spinlock class wraps low level atomic operations to provide spin lock functionality - -class Spinlock { - - std::atomic_flag lock; - -public: - Spinlock() { std::atomic_flag_clear(&lock); } - void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} } - void release() { lock.clear(std::memory_order_release); } -}; - - /// ThreadBase struct is the base of the hierarchy from where we derive all the /// specialized thread classes. @@ -162,7 +162,7 @@ struct ThreadPool : public std::vector { void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&); Depth minimumSplitDepth; - std::mutex mutex; + Spinlock spinlock; std::condition_variable sleepCondition; TimerThread* timer; }; -- 2.39.2