From 51e8efdab5f62ebc23e4f7adaea96f619cbca194 Mon Sep 17 00:00:00 2001 From: Marco Costalba Date: Mon, 30 Jan 2012 14:09:20 +0100 Subject: [PATCH] Fix subtle race with slave allocation When allocating a slave we set both is_searching and splitPoint under lock protection. Unfortunatly the order in which the variables are set is not defined. This article was very clarifying: http://software.intel.com/en-us/blogs/2007/11/30/volatile-almost-useless-for-multi-threaded-programming/ So when in idle loop we test for is_searching and then access splitPoint, it could happen that splitPoint is still not updated leading to a possible crash. Fix the race lock protecting splitPoint access. No functional change. Signed-off-by: Marco Costalba --- src/search.cpp | 22 ++++++++++++++----- src/thread.cpp | 59 +++++++++++++++++--------------------------------- src/thread.h | 4 ++-- 3 files changed, 38 insertions(+), 47 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 572a8ac8..49a7b4da 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1864,9 +1864,14 @@ void Thread::idle_loop(SplitPoint* sp_master) { { assert(!do_sleep && !do_exit); - // Copy split point position and search stack and call search() - Stack ss[MAX_PLY_PLUS_2]; + lock_grab(Threads.splitLock); + + assert(is_searching); SplitPoint* sp = splitPoint; + + lock_release(Threads.splitLock); + + Stack ss[MAX_PLY_PLUS_2]; Position pos(*sp->pos, threadID); memcpy(ss, sp->ss - 1, 4 * sizeof(Stack)); @@ -1885,12 +1890,9 @@ void Thread::idle_loop(SplitPoint* sp_master) { assert(is_searching); - // We return from search with lock held + is_searching = false; sp->slavesMask &= ~(1ULL << threadID); sp->nodes += pos.nodes_searched(); - lock_release(sp->lock); - - is_searching = false; // Wake up master thread so to allow it to return from the idle loop in // case we are the last slave of the split point. @@ -1898,8 +1900,16 @@ void Thread::idle_loop(SplitPoint* sp_master) { && threadID != sp->master && !Threads[sp->master].is_searching) Threads[sp->master].wake_up(); + + // After releasing the lock we cannot access anymore any SplitPoint + // related data in a reliably way becuase it could have been released + // under our feet by the sp master. + lock_release(sp->lock); } } + // In helpful master concept a master can help only a sub-tree of its split + // point, and because here is all finished is not possible master is booked. + assert(!is_searching); } diff --git a/src/thread.cpp b/src/thread.cpp index 3b7a10ee..8a8e7199 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -101,10 +101,7 @@ bool Thread::is_available_to(int master) const { // No active split points means that the thread is available as a slave for any // other thread otherwise apply the "helpful master" concept if possible. - if (!sp_count || (splitPoints[sp_count - 1].slavesMask & (1ULL << master))) - return true; - - return false; + return !sp_count || (splitPoints[sp_count - 1].slavesMask & (1ULL << master)); } @@ -151,11 +148,9 @@ void ThreadsManager::set_size(int cnt) { void ThreadsManager::init() { - // Initialize sleep condition and lock used by thread manager cond_init(sleepCond); - lock_init(threadsLock); + lock_init(splitLock); - // Initialize thread's sleep conditions and split point locks for (int i = 0; i <= MAX_THREADS; i++) { lock_init(threads[i].sleepLock); @@ -198,7 +193,6 @@ void ThreadsManager::exit() { thread_join(threads[i].handle); // Wait for thread termination - // Now we can safely destroy associated locks and wait conditions lock_destroy(threads[i].sleepLock); cond_destroy(threads[i].sleepCond); @@ -206,7 +200,7 @@ void ThreadsManager::exit() { lock_destroy(threads[i].splitPoints[j].lock); } - lock_destroy(threadsLock); + lock_destroy(splitLock); cond_destroy(sleepCond); } @@ -248,21 +242,19 @@ Value ThreadsManager::split(Position& pos, Stack* ss, Value alpha, Value beta, assert(pos.thread() >= 0 && pos.thread() < activeThreads); assert(activeThreads > 1); - int i, master = pos.thread(); + int master = pos.thread(); Thread& masterThread = threads[master]; - // If we already have too many active split points, don't split if (masterThread.activeSplitPoints >= MAX_ACTIVE_SPLIT_POINTS) return bestValue; // Pick the next available split point from the split point stack SplitPoint* sp = &masterThread.splitPoints[masterThread.activeSplitPoints]; - // Initialize the split point sp->parent = masterThread.splitPoint; sp->master = master; sp->is_betaCutoff = false; - sp->slavesMask = (1ULL << master); + sp->slavesMask = 1ULL << master; sp->depth = depth; sp->threatMove = threatMove; sp->alpha = alpha; @@ -275,68 +267,57 @@ Value ThreadsManager::split(Position& pos, Stack* ss, Value alpha, Value beta, sp->nodes = 0; sp->ss = ss; - // If we are here it means we are not available assert(masterThread.is_searching); - int workersCnt = 1; // At least the master is included + int slavesCnt = 0; // Try to allocate available threads and ask them to start searching setting // is_searching flag. This must be done under lock protection to avoid concurrent // allocation of the same slave by another master. - lock_grab(threadsLock); + lock_grab(splitLock); lock_grab(sp->lock); // To protect sp->slaves_mask - for (i = 0; !Fake && i < activeThreads; i++) + for (int i = 0; i < activeThreads && !Fake; i++) if (threads[i].is_available_to(master)) { - sp->slavesMask |= (1ULL << i); + sp->slavesMask |= 1ULL << i; threads[i].splitPoint = sp; - - // Allocate the slave and make it exit from idle_loop() - threads[i].is_searching = true; + threads[i].is_searching = true; // Slave leaves idle_loop() if (useSleepingThreads) threads[i].wake_up(); - if (++workersCnt >= maxThreadsPerSplitPoint) + if (++slavesCnt + 1 >= maxThreadsPerSplitPoint) // Master is always included break; } - lock_release(sp->lock); - lock_release(threadsLock); - - // We failed to allocate even one slave, return - if (!Fake && workersCnt == 1) - return bestValue; - masterThread.splitPoint = sp; masterThread.activeSplitPoints++; + lock_release(sp->lock); + lock_release(splitLock); + // Everything is set up. The master thread enters the idle loop, from which // it will instantly launch a search, because its is_searching flag is set. // We pass the split point as a parameter to the idle loop, which means that // the thread will return from the idle loop when all slaves have finished // their work at this split point. - masterThread.idle_loop(sp); - - // In helpful master concept a master can help only a sub-tree of its split - // point, and because here is all finished is not possible master is booked. - assert(!masterThread.is_searching); + if (slavesCnt || Fake) + masterThread.idle_loop(sp); // We have returned from the idle loop, which means that all threads are - // finished. Note that changing state and decreasing activeSplitPoints is done - // under lock protection to avoid a race with Thread::is_available_to(). - lock_grab(threadsLock); + // finished. Note that setting is_searching and decreasing activeSplitPoints is + // done under lock protection to avoid a race with Thread::is_available_to(). + lock_grab(splitLock); lock_grab(sp->lock); // To protect sp->nodes - masterThread.is_searching = true; masterThread.activeSplitPoints--; masterThread.splitPoint = sp->parent; pos.set_nodes_searched(pos.nodes_searched() + sp->nodes); lock_release(sp->lock); - lock_release(threadsLock); + lock_release(splitLock); return sp->bestValue; } diff --git a/src/thread.h b/src/thread.h index beb09e4d..10d12014 100644 --- a/src/thread.h +++ b/src/thread.h @@ -123,12 +123,12 @@ private: friend struct Thread; Thread threads[MAX_THREADS + 1]; // Last one is used as a timer - Lock threadsLock; + Lock splitLock; + WaitCondition sleepCond; Depth minimumSplitDepth; int maxThreadsPerSplitPoint; int activeThreads; bool useSleepingThreads; - WaitCondition sleepCond; }; extern ThreadsManager Threads; -- 2.39.2