summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
f8f5dcb)
Balance threads between split points.
There are huge differences between different machines and autopurging makes it very difficult to measure the improvement in fishtest, but the following was recorded for 16 threads at 15+0.05:
For Bravone (1000 games): 0 ELO
For Glinscott (1000 games): +20 ELO
For bKingUs (1000 games): +50 ELO
For fastGM (1500 games): +50 ELO
The change was regression for no one, and a big improvement for some, so it should be fine to commit it.
Also for 8 threads at 15+0.05 we measured a statistically significant improvement:
ELO: 6.19 +-3.9 (95%) LOS: 99.9%
Total: 10325 W: 1824 L: 1640 D: 6861
Finally it was verified that there was no (significant) regression for
4 threads:
ELO: 0.09 +-2.8 (95%) LOS: 52.4%
Total: 19908 W: 3422 L: 3417 D: 13069
2 threads:
ELO: 0.38 +-3.0 (95%) LOS: 60.0%
Total: 19044 W: 3480 L: 3459 D: 12105
1 thread:
ELO: -1.27 +-2.1 (95%) LOS: 12.3%
Total: 40000 W: 7829 L: 7975 D: 24196
Resolves #258
&& Threads.size() >= 2
&& depth >= Threads.minimumSplitDepth
&& ( !thisThread->activeSplitPoint
&& Threads.size() >= 2
&& depth >= Threads.minimumSplitDepth
&& ( !thisThread->activeSplitPoint
- || !thisThread->activeSplitPoint->allSlavesSearching)
+ || !thisThread->activeSplitPoint->allSlavesSearching
+ || ( int(Threads.size()) > MAX_SLAVES_PER_SPLITPOINT
+ && thisThread->activeSplitPoint->slavesCount == MAX_SLAVES_PER_SPLITPOINT))
&& thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD)
{
assert(bestValue > -VALUE_INFINITE && bestValue < beta);
&& thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD)
{
assert(bestValue > -VALUE_INFINITE && bestValue < beta);
// Try to late join to another split point if none of its slaves has
// already finished.
if (Threads.size() > 2)
// Try to late join to another split point if none of its slaves has
// already finished.
if (Threads.size() > 2)
+ {
+ SplitPoint *bestSp = NULL;
+ int bestThread = 0;
+ int bestScore = INT_MAX;
+
for (size_t i = 0; i < Threads.size(); ++i)
{
const int size = Threads[i]->splitPointsSize; // Local copy
for (size_t i = 0; i < Threads.size(); ++i)
{
const int size = Threads[i]->splitPointsSize; // Local copy
if ( sp
&& sp->allSlavesSearching
if ( sp
&& sp->allSlavesSearching
+ && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT
&& available_to(Threads[i]))
{
&& available_to(Threads[i]))
{
- // Recheck the conditions under lock protection
- Threads.mutex.lock();
- sp->mutex.lock();
+ int score = sp->spLevel * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1;
- if ( sp->allSlavesSearching
- && available_to(Threads[i]))
- sp->slavesMask.set(idx);
- activeSplitPoint = sp;
- searching = true;
+ bestSp = sp;
+ bestThread = i;
+ bestScore = score;
- sp->mutex.unlock();
- Threads.mutex.unlock();
-
- break; // Just a single attempt
+ if (bestSp)
+ {
+ sp = bestSp;
+
+ // Recheck the conditions under lock protection
+ Threads.mutex.lock();
+ sp->mutex.lock();
+
+ if ( sp->allSlavesSearching
+ && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT
+ && available_to(Threads[bestThread]))
+ {
+ sp->slavesMask.set(idx);
+ sp->slavesCount++;
+ activeSplitPoint = sp;
+ searching = true;
+
+ sp->mutex.unlock();
+ Threads.mutex.unlock();
}
// Grab the lock to avoid races with Thread::notify_one()
}
// Grab the lock to avoid races with Thread::notify_one()
sp.masterThread = this;
sp.parentSplitPoint = activeSplitPoint;
sp.masterThread = this;
sp.parentSplitPoint = activeSplitPoint;
+ sp.spLevel = activeSplitPoint ? activeSplitPoint->spLevel + 1 : 0;
sp.slavesMask = 0, sp.slavesMask.set(idx);
sp.slavesMask = 0, sp.slavesMask.set(idx);
sp.depth = depth;
sp.bestValue = *bestValue;
sp.bestMove = *bestMove;
sp.depth = depth;
sp.bestValue = *bestValue;
sp.bestMove = *bestMove;
- while ((slave = Threads.available_slave(this)) != NULL)
+ while ( sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT
+ && (slave = Threads.available_slave(this)) != NULL)
{
sp.slavesMask.set(slave->idx);
{
sp.slavesMask.set(slave->idx);
slave->activeSplitPoint = &sp;
slave->searching = true; // Slave leaves idle_loop()
slave->notify_one(); // Could be sleeping
slave->activeSplitPoint = &sp;
slave->searching = true; // Slave leaves idle_loop()
slave->notify_one(); // Could be sleeping
const int MAX_THREADS = 128;
const int MAX_SPLITPOINTS_PER_THREAD = 8;
const int MAX_THREADS = 128;
const int MAX_SPLITPOINTS_PER_THREAD = 8;
+const int MAX_SLAVES_PER_SPLITPOINT = 4;
/// Mutex and ConditionVariable struct are wrappers of the low level locking
/// machinery and are modeled after the corresponding C++11 classes.
/// Mutex and ConditionVariable struct are wrappers of the low level locking
/// machinery and are modeled after the corresponding C++11 classes.
const Position* pos;
Search::Stack* ss;
Thread* masterThread;
const Position* pos;
Search::Stack* ss;
Thread* masterThread;
Depth depth;
Value beta;
int nodeType;
Depth depth;
Value beta;
int nodeType;
// Shared variable data
Mutex mutex;
std::bitset<MAX_THREADS> slavesMask;
// Shared variable data
Mutex mutex;
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
volatile Value alpha;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
volatile Value alpha;