It is reported to be defenitly faster with increasing
number of threads, we go from a +3.5% with 4 threads
to a +15% with 16 threads.
The only drawback is that now when testing with more
threads than physical available cores, the speed slows
down to a crawl. This is expected and was similar at what
we had setting the old sleepingThreads to false.
No functional change.
continue;
moveCount = ++splitPoint->moveCount;
continue;
moveCount = ++splitPoint->moveCount;
- splitPoint->mutex.unlock();
+ splitPoint->spinlock.release();
&& moveCount >= FutilityMoveCounts[improving][depth])
{
if (SpNode)
&& moveCount >= FutilityMoveCounts[improving][depth])
{
if (SpNode)
- splitPoint->mutex.lock();
+ splitPoint->spinlock.acquire();
- splitPoint->mutex.lock();
+ splitPoint->spinlock.acquire();
if (bestValue > splitPoint->bestValue)
splitPoint->bestValue = bestValue;
}
if (bestValue > splitPoint->bestValue)
splitPoint->bestValue = bestValue;
}
if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO)
{
if (SpNode)
if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO)
{
if (SpNode)
- splitPoint->mutex.lock();
+ splitPoint->spinlock.acquire();
// Step 18. Check for new best move
if (SpNode)
{
// Step 18. Check for new best move
if (SpNode)
{
- splitPoint->mutex.lock();
+ splitPoint->spinlock.acquire();
bestValue = splitPoint->bestValue;
alpha = splitPoint->alpha;
}
bestValue = splitPoint->bestValue;
alpha = splitPoint->alpha;
}
// If this thread has been assigned work, launch a search
while (searching)
{
// If this thread has been assigned work, launch a search
while (searching)
{
+ Threads.spinlock.acquire();
assert(activeSplitPoint);
SplitPoint* sp = activeSplitPoint;
assert(activeSplitPoint);
SplitPoint* sp = activeSplitPoint;
- Threads.mutex.unlock();
+ Threads.spinlock.release();
Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2)
Position pos(*sp->pos, this);
Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2)
Position pos(*sp->pos, this);
std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack));
ss->splitPoint = sp;
std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack));
ss->splitPoint = sp;
+ sp->spinlock.acquire();
assert(activePosition == nullptr);
assert(activePosition == nullptr);
// After releasing the lock we can't access any SplitPoint related data
// in a safe way because it could have been released under our feet by
// the sp master.
// After releasing the lock we can't access any SplitPoint related data
// in a safe way because it could have been released under our feet by
// the sp master.
+ sp->spinlock.release();
// Try to late join to another split point if none of its slaves has
// already finished.
// Try to late join to another split point if none of its slaves has
// already finished.
if ( sp
&& sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
if ( sp
&& sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
+ && available_to(sp->master))
{
assert(this != th);
assert(!(this_sp && this_sp->slavesMask.none()));
{
assert(this != th);
assert(!(this_sp && this_sp->slavesMask.none()));
sp = bestSp;
// Recheck the conditions under lock protection
sp = bestSp;
// Recheck the conditions under lock protection
- Threads.mutex.lock();
- sp->mutex.lock();
+ Threads.spinlock.acquire();
+ sp->spinlock.acquire();
if ( sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
if ( sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
- sp->mutex.unlock();
- Threads.mutex.unlock();
+ sp->spinlock.release();
+ Threads.spinlock.release();
+ Threads.spinlock.acquire();
int64_t nodes = RootPos.nodes_searched();
int64_t nodes = RootPos.nodes_searched();
{
SplitPoint& sp = th->splitPoints[i];
{
SplitPoint& sp = th->splitPoints[i];
if (sp.slavesMask.test(idx) && Threads[idx]->activePosition)
nodes += Threads[idx]->activePosition->nodes_searched();
if (sp.slavesMask.test(idx) && Threads[idx]->activePosition)
nodes += Threads[idx]->activePosition->nodes_searched();
- Threads.mutex.unlock();
+ Threads.spinlock.release();
if (nodes >= Limits.nodes)
Signals.stop = true;
if (nodes >= Limits.nodes)
Signals.stop = true;
// Try to allocate available threads and ask them to start searching setting
// 'searching' flag. This must be done under lock protection to avoid concurrent
// allocation of the same slave by another master.
// Try to allocate available threads and ask them to start searching setting
// 'searching' flag. This must be done under lock protection to avoid concurrent
// allocation of the same slave by another master.
- Threads.mutex.lock();
- sp.mutex.lock();
+ Threads.spinlock.acquire();
+ sp.spinlock.acquire();
sp.allSlavesSearching = true; // Must be set under lock protection
++splitPointsSize;
sp.allSlavesSearching = true; // Must be set under lock protection
++splitPointsSize;
// it will instantly launch a search, because its 'searching' flag is set.
// The thread will return from the idle loop when all slaves have finished
// their work at this split point.
// it will instantly launch a search, because its 'searching' flag is set.
// The thread will return from the idle loop when all slaves have finished
// their work at this split point.
- sp.mutex.unlock();
- Threads.mutex.unlock();
+ sp.spinlock.release();
+ Threads.spinlock.release();
Thread::idle_loop(); // Force a call to base class idle_loop()
Thread::idle_loop(); // Force a call to base class idle_loop()
// We have returned from the idle loop, which means that all threads are
// finished. Note that setting 'searching' and decreasing splitPointsSize must
// be done under lock protection to avoid a race with Thread::available_to().
// We have returned from the idle loop, which means that all threads are
// finished. Note that setting 'searching' and decreasing splitPointsSize must
// be done under lock protection to avoid a race with Thread::available_to().
- Threads.mutex.lock();
- sp.mutex.lock();
+ Threads.spinlock.acquire();
+ sp.spinlock.acquire();
searching = true;
--splitPointsSize;
searching = true;
--splitPointsSize;
*bestMove = sp.bestMove;
*bestValue = sp.bestValue;
*bestMove = sp.bestMove;
*bestValue = sp.bestValue;
- sp.mutex.unlock();
- Threads.mutex.unlock();
+ sp.spinlock.release();
+ Threads.spinlock.release();
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
+/// Spinlock class wraps low level atomic operations to provide spin lock functionality
+
+class Spinlock {
+
+ std::atomic_flag lock;
+
+public:
+ Spinlock() { std::atomic_flag_clear(&lock); }
+ void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
+ void release() { lock.clear(std::memory_order_release); }
+};
+
+
/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.
/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.
SplitPoint* parentSplitPoint;
// Shared variable data
SplitPoint* parentSplitPoint;
// Shared variable data
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
-/// Spinlock class wraps low level atomic operations to provide spin lock functionality
-
-class Spinlock {
-
- std::atomic_flag lock;
-
-public:
- Spinlock() { std::atomic_flag_clear(&lock); }
- void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
- void release() { lock.clear(std::memory_order_release); }
-};
-
-
/// ThreadBase struct is the base of the hierarchy from where we derive all the
/// specialized thread classes.
/// ThreadBase struct is the base of the hierarchy from where we derive all the
/// specialized thread classes.
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth;
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth;
std::condition_variable sleepCondition;
TimerThread* timer;
};
std::condition_variable sleepCondition;
TimerThread* timer;
};