Improve smp performance for high number of threads

author Joona Kiiski <joona.kiiski@gmail.com>

Sat, 14 Feb 2015 20:46:00 +0000 (20:46 +0000)

committer Joona Kiiski <joona.kiiski@gmail.com>

Mon, 16 Feb 2015 20:36:13 +0000 (20:36 +0000)
author Joona Kiiski <joona.kiiski@gmail.com>
Sat, 14 Feb 2015 20:46:00 +0000 (20:46 +0000)
committer Joona Kiiski <joona.kiiski@gmail.com>
Mon, 16 Feb 2015 20:36:13 +0000 (20:36 +0000)
diff --git a/src/search.cpp b/src/search.cpp

index ffe361268e6b0a113468e15b4ac18b758e32c977..29820a1f45ac884a6c77edeb5abaa46b0677dd4d 100644 (file)
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -1042,7 +1042,9 @@ moves_loop: // When in check and at SpNode search starts from here
            &&  Threads.size() >= 2
            &&  depth >= Threads.minimumSplitDepth
            &&  (   !thisThread->activeSplitPoint
            &&  Threads.size() >= 2
            &&  depth >= Threads.minimumSplitDepth
            &&  (   !thisThread->activeSplitPoint
-               || !thisThread->activeSplitPoint->allSlavesSearching)
+               || !thisThread->activeSplitPoint->allSlavesSearching
+               || (   int(Threads.size()) > MAX_SLAVES_PER_SPLITPOINT
+                   && thisThread->activeSplitPoint->slavesCount == MAX_SLAVES_PER_SPLITPOINT))
            &&  thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD)
        {
            assert(bestValue > -VALUE_INFINITE && bestValue < beta);
            &&  thisThread->splitPointsSize < MAX_SPLITPOINTS_PER_THREAD)
        {
            assert(bestValue > -VALUE_INFINITE && bestValue < beta);
@@ -1587,6 +1589,11 @@ void Thread::idle_loop() {
            // Try to late join to another split point if none of its slaves has
            // already finished.
            if (Threads.size() > 2)
            // Try to late join to another split point if none of its slaves has
            // already finished.
            if (Threads.size() > 2)
+          {
+              SplitPoint *bestSp = NULL;
+              int bestThread = 0;
+              int bestScore = INT_MAX;
+
                for (size_t i = 0; i < Threads.size(); ++i)
                {
                    const int size = Threads[i]->splitPointsSize; // Local copy
                for (size_t i = 0; i < Threads.size(); ++i)
                {
                    const int size = Threads[i]->splitPointsSize; // Local copy
@@ -1594,26 +1601,42 @@ void Thread::idle_loop() {
  
                    if (   sp
                        && sp->allSlavesSearching
  
                    if (   sp
                        && sp->allSlavesSearching
+                      && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT
                        && available_to(Threads[i]))
                    {
                        && available_to(Threads[i]))
                    {
-                      // Recheck the conditions under lock protection
-                      Threads.mutex.lock();
-                      sp->mutex.lock();
+                      int score = sp->spLevel * 256 * 256 + sp->slavesCount * 256 - sp->depth * 1;
  
  
-                      if (   sp->allSlavesSearching
-                          && available_to(Threads[i]))
+                      if (score < bestScore)
                        {
                        {
-                           sp->slavesMask.set(idx);
-                           activeSplitPoint = sp;
-                           searching = true;
+                           bestSp = sp;
+                           bestThread = i;
+                           bestScore = score;
                        }
                        }
+                  }
+              }
  
  
-                      sp->mutex.unlock();
-                      Threads.mutex.unlock();
-
-                      break; // Just a single attempt
+              if (bestSp)
+              {
+                  sp = bestSp;
+ 
+                  // Recheck the conditions under lock protection
+                  Threads.mutex.lock();
+                  sp->mutex.lock();
+
+                  if (   sp->allSlavesSearching
+                      && sp->slavesCount < MAX_SLAVES_PER_SPLITPOINT
+                      && available_to(Threads[bestThread]))
+                  {
+                      sp->slavesMask.set(idx);
+                      sp->slavesCount++;
+                      activeSplitPoint = sp;
+                      searching = true;
                    }
                    }
+
+                  sp->mutex.unlock();
+                  Threads.mutex.unlock();
                }
                }
+          }
        }
  
        // Grab the lock to avoid races with Thread::notify_one()
        }
  
        // Grab the lock to avoid races with Thread::notify_one()
diff --git a/src/thread.cpp b/src/thread.cpp

index b8571dcd20283c9b9b7ef502559e7ca91ff05db5..02cb45629dff48b8048890911139efe0eae5d46a 100644 (file)
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -154,7 +154,9 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
  
    sp.masterThread = this;
    sp.parentSplitPoint = activeSplitPoint;
  
    sp.masterThread = this;
    sp.parentSplitPoint = activeSplitPoint;
+  sp.spLevel = activeSplitPoint ? activeSplitPoint->spLevel + 1 : 0;
    sp.slavesMask = 0, sp.slavesMask.set(idx);
    sp.slavesMask = 0, sp.slavesMask.set(idx);
+  sp.slavesCount = 1;
    sp.depth = depth;
    sp.bestValue = *bestValue;
    sp.bestMove = *bestMove;
    sp.depth = depth;
    sp.bestValue = *bestValue;
    sp.bestMove = *bestMove;
@@ -182,9 +184,11 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
  
    Thread* slave;
  
  
    Thread* slave;
  
-  while ((slave = Threads.available_slave(this)) != NULL)
+  while (    sp.slavesCount < MAX_SLAVES_PER_SPLITPOINT 
+         && (slave = Threads.available_slave(this)) != NULL)
    {
        sp.slavesMask.set(slave->idx);
    {
        sp.slavesMask.set(slave->idx);
+      sp.slavesCount++;
        slave->activeSplitPoint = &sp;
        slave->searching = true; // Slave leaves idle_loop()
        slave->notify_one(); // Could be sleeping
        slave->activeSplitPoint = &sp;
        slave->searching = true; // Slave leaves idle_loop()
        slave->notify_one(); // Could be sleeping
diff --git a/src/thread.h b/src/thread.h

index 9750ed7ba020be8008007f482f7ab9e146e4c9dd..949f87ad28ae086fe75e0577b98e859029cfc4ed 100644 (file)
--- a/src/thread.h
+++ b/src/thread.h
@@ -33,6 +33,7 @@ struct Thread;
  
  const int MAX_THREADS = 128;
  const int MAX_SPLITPOINTS_PER_THREAD = 8;
  
  const int MAX_THREADS = 128;
  const int MAX_SPLITPOINTS_PER_THREAD = 8;
+const int MAX_SLAVES_PER_SPLITPOINT = 4;
  
  /// Mutex and ConditionVariable struct are wrappers of the low level locking
  /// machinery and are modeled after the corresponding C++11 classes.
  
  /// Mutex and ConditionVariable struct are wrappers of the low level locking
  /// machinery and are modeled after the corresponding C++11 classes.
@@ -72,6 +73,7 @@ struct SplitPoint {
    const Position* pos;
    Search::Stack* ss;
    Thread* masterThread;
    const Position* pos;
    Search::Stack* ss;
    Thread* masterThread;
+  int spLevel;
    Depth depth;
    Value beta;
    int nodeType;
    Depth depth;
    Value beta;
    int nodeType;
@@ -84,6 +86,7 @@ struct SplitPoint {
    // Shared variable data
    Mutex mutex;
    std::bitset<MAX_THREADS> slavesMask;
    // Shared variable data
    Mutex mutex;
    std::bitset<MAX_THREADS> slavesMask;
+  int slavesCount;
    volatile bool allSlavesSearching;
    volatile uint64_t nodes;
    volatile Value alpha;
    volatile bool allSlavesSearching;
    volatile uint64_t nodes;
    volatile Value alpha;
author	Joona Kiiski <joona.kiiski@gmail.com>
	Sat, 14 Feb 2015 20:46:00 +0000 (20:46 +0000)
committer	Joona Kiiski <joona.kiiski@gmail.com>
	Mon, 16 Feb 2015 20:36:13 +0000 (20:36 +0000)
src/search.cpp		patch \| blob \| history
src/thread.cpp		patch \| blob \| history
src/thread.h		patch \| blob \| history