Another split() tweak session

[stockfish] / src / search.cpp
diff --git a/src/search.cpp b/src/search.cpp

index 22d5d647447d520accc4c23ec785766e6abfde0b..768b0594e559113806f4fa9799c7eeb0bc71824a 100644 (file)
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -54,6 +54,10 @@ namespace {
    /// Types
    enum NodeType { NonPV, PV };
  
+  // Set to true to force running with one thread.
+  // Used for debugging SMP code.
+  const bool FakeSplit = false;
+
    // ThreadsManager class is used to handle all the threads related stuff in search,
    // init, starting, parking and, the most important, launching a slave thread at a
    // split point are what this class does. All the access to shared thread data is
@@ -83,8 +87,10 @@ namespace {
      void wake_sleeping_threads();
      void put_threads_to_sleep();
      void idle_loop(int threadID, SplitPoint* sp);
-    bool split(const Position& pos, SearchStack* ss, int ply, Value* alpha, const Value beta, Value* bestValue,
-               Depth depth, bool mateThreat, int* moves, MovePicker* mp, int master, bool pvNode);
+
+    template <bool Fake>
+    void split(const Position& pos, SearchStack* ss, int ply, Value* alpha, const Value beta, Value* bestValue,
+               Depth depth, bool mateThreat, int* moveCount, MovePicker* mp, int master, bool pvNode);
  
    private:
      friend void poll();
@@ -282,11 +288,14 @@ namespace {
    Value search(Position& pos, SearchStack ss[], Value alpha, Value beta, Depth depth, int ply, bool allowNullmove, int threadID,  Move excludedMove = MOVE_NONE);
  
    template <NodeType PvNode>
-  Depth extension(const Position& pos, Move m, bool captureOrPromotion, bool moveIsCheck, bool singleEvasion, bool mateThreat, bool* dangerous);
-
    Value qsearch(Position& pos, SearchStack ss[], Value alpha, Value beta, Depth depth, int ply, int threadID);
+
+  template <NodeType PvNode>
    void sp_search(SplitPoint* sp, int threadID);
-  void sp_search_pv(SplitPoint* sp, int threadID);
+
+  template <NodeType PvNode>
+  Depth extension(const Position& pos, Move m, bool captureOrPromotion, bool moveIsCheck, bool singleEvasion, bool mateThreat, bool* dangerous);
+
    void init_node(SearchStack ss[], int ply, int threadID);
    void update_pv(SearchStack ss[], int ply);
    void sp_update_pv(SearchStack* pss, SearchStack ss[], int ply);
@@ -795,8 +804,8 @@ namespace {
      beta = *betaPtr;
      isCheck = pos.is_check();
  
-    // Step 1. Initialize node and poll (omitted at root, but I can see no good reason for this, FIXME)
-    // Step 2. Check for aborted search (omitted at root, because we do not initialize root node)
+    // Step 1. Initialize node and poll (omitted at root, init_ss_array() has already initialized root node)
+    // Step 2. Check for aborted search (omitted at root)
      // Step 3. Mate distance pruning (omitted at root)
      // Step 4. Transposition table lookup (omitted at root)
  
@@ -804,8 +813,6 @@ namespace {
      // At root we do this only to get reference value for child nodes
      if (!isCheck)
          ss[0].eval = evaluate(pos, ei, 0);
-    else
-        ss[0].eval = VALUE_NONE; // HACK because we do not initialize root node
  
      // Step 6. Razoring (omitted at root)
      // Step 7. Static null move pruning (omitted at root)
@@ -1030,6 +1037,7 @@ namespace {
  
      assert(alpha >= -VALUE_INFINITE && alpha <= VALUE_INFINITE);
      assert(beta > alpha && beta <= VALUE_INFINITE);
+    assert(PvNode || alpha == beta - 1);
      assert(ply >= 0 && ply < PLY_MAX);
      assert(threadID >= 0 && threadID < TM.active_threads());
  
@@ -1048,7 +1056,7 @@ namespace {
      oldAlpha = alpha;
  
      if (depth < OnePly)
-        return qsearch(pos, ss, alpha, beta, Depth(0), ply, threadID);
+        return qsearch<PvNode>(pos, ss, alpha, beta, Depth(0), ply, threadID);
  
      // Step 1. Initialize node and poll
      // Polling can abort search.
@@ -1118,7 +1126,7 @@ namespace {
          && !pos.has_pawn_on_7th(pos.side_to_move()))
      {
          Value rbeta = beta - razor_margin(depth);
-        Value v = qsearch(pos, ss, rbeta-1, rbeta, Depth(0), ply, threadID);
+        Value v = qsearch<NonPV>(pos, ss, rbeta-1, rbeta, Depth(0), ply, threadID);
          if (v < rbeta)
              // Logically we should return (v + razor_margin(depth)), but
              // surprisingly this did slightly weaker in tests.
@@ -1275,7 +1283,9 @@ namespace {
                continue;
  
            // Value based pruning
-          Depth predictedDepth = newDepth - reduction<NonPV>(depth, moveCount); // FIXME We illogically ignore reduction condition depth >= 3*OnePly
+          // We illogically ignore reduction condition depth >= 3*OnePly for predicted depth,
+          // but fixing this made program slightly weaker.
+          Depth predictedDepth = newDepth - reduction<NonPV>(depth, moveCount);
            futilityValueScaled =  ss[ply].eval + futility_margin(predictedDepth, moveCount)
                                 + H.gain(pos.piece_on(move_from(move)), move_to(move));
  
@@ -1339,8 +1349,11 @@ namespace {
            bestValue = value;
            if (value > alpha)
            {
-              alpha = value;
+              if (PvNode && value < beta) // This guarantees that always: alpha < beta
+                  alpha = value;
+
                update_pv(ss, ply);
+
                if (value == value_mate_in(ply + 1))
                    ss[ply].mateKiller = move;
            }
@@ -1353,10 +1366,9 @@ namespace {
            && Iteration <= 99
            && TM.available_thread_exists(threadID)
            && !AbortSearch
-          && !TM.thread_should_stop(threadID)
-          && TM.split(pos, ss, ply, &alpha, beta, &bestValue,
-                      depth, mateThreat, &moveCount, &mp, threadID, PvNode))
-          break;
+          && !TM.thread_should_stop(threadID))
+          TM.split<FakeSplit>(pos, ss, ply, &alpha, beta, &bestValue, depth,
+                              mateThreat, &moveCount, &mp, threadID, PvNode);
      }
  
      // Step 19. Check for mate and stalemate
@@ -1399,11 +1411,13 @@ namespace {
    // search function when the remaining depth is zero (or, to be more precise,
    // less than OnePly).
  
+  template <NodeType PvNode>
    Value qsearch(Position& pos, SearchStack ss[], Value alpha, Value beta,
                  Depth depth, int ply, int threadID) {
  
      assert(alpha >= -VALUE_INFINITE && alpha <= VALUE_INFINITE);
      assert(beta >= -VALUE_INFINITE && beta <= VALUE_INFINITE);
+    assert(PvNode || alpha == beta - 1);
      assert(depth <= 0);
      assert(ply >= 0 && ply < PLY_MAX);
      assert(threadID >= 0 && threadID < TM.active_threads());
@@ -1415,7 +1429,6 @@ namespace {
      bool isCheck, enoughMaterial, moveIsCheck, evasionPrunable;
      const TTEntry* tte = NULL;
      int moveCount = 0;
-    bool pvNode = (beta - alpha != 1);
      Value oldAlpha = alpha;
  
      // Initialize, and make an early exit in case of an aborted search,
@@ -1434,7 +1447,7 @@ namespace {
      tte = TT.retrieve(pos.get_key());
      ttMove = (tte ? tte->move() : MOVE_NONE);
  
-    if (!pvNode && tte && ok_to_use_TT(tte, depth, beta, ply))
+    if (!PvNode && tte && ok_to_use_TT(tte, depth, beta, ply))
      {
          assert(tte->type() != VALUE_TYPE_EVAL);
  
@@ -1499,9 +1512,9 @@ namespace {
        ss[ply].currentMove = move;
  
        // Futility pruning
-      if (   enoughMaterial
+      if (   !PvNode
+          &&  enoughMaterial
            && !isCheck
-          && !pvNode
            && !moveIsCheck
            &&  move != ttMove
            && !move_is_promotion(move)
@@ -1527,8 +1540,8 @@ namespace {
                         && !pos.can_castle(pos.side_to_move());
  
        // Don't search moves with negative SEE values
-      if (   (!isCheck || evasionPrunable)
-          && !pvNode
+      if (   !PvNode
+          && (!isCheck || evasionPrunable)
            &&  move != ttMove
            && !move_is_promotion(move)
            &&  pos.see_sign(move) < 0)
@@ -1536,7 +1549,7 @@ namespace {
  
        // Make and search the move
        pos.do_move(move, st, ci, moveIsCheck);
-      value = -qsearch(pos, ss, -beta, -alpha, depth-OnePly, ply+1, threadID);
+      value = -qsearch<PvNode>(pos, ss, -beta, -alpha, depth-OnePly, ply+1, threadID);
        pos.undo_move(move);
  
        assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
@@ -1593,6 +1606,7 @@ namespace {
    // also don't need to store anything to the hash table here:  This is taken
    // care of after we return from the split point.
  
+  template <NodeType PvNode>
    void sp_search(SplitPoint* sp, int threadID) {
  
      assert(threadID >= 0 && threadID < TM.active_threads());
@@ -1601,7 +1615,8 @@ namespace {
      StateInfo st;
      Move move;
      Depth ext, newDepth;
-    Value value, futilityValueScaled;
+    Value value;
+    Value futilityValueScaled; // NonPV specific
      bool isCheck, moveIsCheck, captureOrPromotion, dangerous;
      int moveCount;
      value = -VALUE_INFINITE;
@@ -1616,10 +1631,10 @@ namespace {
      lock_grab(&(sp->lock));
  
      while (    sp->bestValue < sp->beta
-           && !TM.thread_should_stop(threadID)
-           && (move = sp->mp->get_next_move()) != MOVE_NONE)
+           && (move = sp->mp->get_next_move()) != MOVE_NONE
+           && !TM.thread_should_stop(threadID))
      {
-      moveCount = ++sp->moves;
+      moveCount = ++sp->moveCount;
        lock_release(&(sp->lock));
  
        assert(move_is_ok(move));
@@ -1628,14 +1643,15 @@ namespace {
        captureOrPromotion = pos.move_is_capture_or_promotion(move);
  
        // Step 11. Decide the new search depth
-      ext = extension<NonPV>(pos, move, captureOrPromotion, moveIsCheck, false, sp->mateThreat, &dangerous);
+      ext = extension<PvNode>(pos, move, captureOrPromotion, moveIsCheck, false, sp->mateThreat, &dangerous);
        newDepth = sp->depth - OnePly + ext;
  
        // Update current move
        ss[sp->ply].currentMove = move;
  
-      // Step 12. Futility pruning
-      if (   !isCheck
+      // Step 12. Futility pruning (is omitted in PV nodes)
+      if (   !PvNode
+          && !isCheck
            && !dangerous
            && !captureOrPromotion
            && !move_is_castle(move))
@@ -1676,135 +1692,24 @@ namespace {
            && !move_is_castle(move)
            && !move_is_killer(move, ss[sp->ply]))
        {
-          ss[sp->ply].reduction = reduction<NonPV>(sp->depth, moveCount);
-          if (ss[sp->ply].reduction)
-          {
-              value = -search<NonPV>(pos, ss, -(sp->alpha+1), -(sp->alpha), newDepth-ss[sp->ply].reduction, sp->ply+1, true, threadID);
-              doFullDepthSearch = (value >= sp->beta && !TM.thread_should_stop(threadID));
-          }
-      }
-
-      // Step 15. Full depth search
-      if (doFullDepthSearch)
-      {
-          ss[sp->ply].reduction = Depth(0);
-          value = -search<NonPV>(pos, ss, -(sp->alpha+1), -(sp->alpha), newDepth, sp->ply+1, true, threadID);
-      }
-
-      // Step 16. Undo move
-      pos.undo_move(move);
-
-      assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
-
-      // Step 17. Check for new best move
-      lock_grab(&(sp->lock));
-
-      if (value > sp->bestValue && !TM.thread_should_stop(threadID))
-      {
-          sp->bestValue = value;
-          if (sp->bestValue >= sp->beta)
-          {
-              sp->stopRequest = true;
-              sp_update_pv(sp->parentSstack, ss, sp->ply);
-          }
-      }
-    }
-
-    /* Here we have the lock still grabbed */
-
-    sp->slaves[threadID] = 0;
-    sp->cpus--;
-
-    lock_release(&(sp->lock));
-  }
-
-
-  // sp_search_pv() is used to search from a PV split point.  This function
-  // is called by each thread working at the split point.  It is similar to
-  // the normal search_pv() function, but simpler.  Because we have already
-  // probed the hash table and searched the first move before splitting, we
-  // don't have to repeat all this work in sp_search_pv().  We also don't
-  // need to store anything to the hash table here: This is taken care of
-  // after we return from the split point.
-
-  void sp_search_pv(SplitPoint* sp, int threadID) {
-
-    assert(threadID >= 0 && threadID < TM.active_threads());
-    assert(TM.active_threads() > 1);
-
-    StateInfo st;
-    Move move;
-    Depth ext, newDepth;
-    Value value;
-    bool moveIsCheck, captureOrPromotion, dangerous;
-    int moveCount;
-    value = -VALUE_INFINITE;
-
-    Position pos(*sp->pos);
-    CheckInfo ci(pos);
-    SearchStack* ss = sp->sstack[threadID];
-
-    // Step 10. Loop through moves
-    // Loop through all legal moves until no moves remain or a beta cutoff occurs
-    lock_grab(&(sp->lock));
-
-    while (    sp->alpha < sp->beta
-           && !TM.thread_should_stop(threadID)
-           && (move = sp->mp->get_next_move()) != MOVE_NONE)
-    {
-      moveCount = ++sp->moves;
-      lock_release(&(sp->lock));
-
-      assert(move_is_ok(move));
-
-      moveIsCheck = pos.move_is_check(move, ci);
-      captureOrPromotion = pos.move_is_capture_or_promotion(move);
-
-      // Step 11. Decide the new search depth
-      ext = extension<PV>(pos, move, captureOrPromotion, moveIsCheck, false, sp->mateThreat, &dangerous);
-      newDepth = sp->depth - OnePly + ext;
-
-      // Update current move
-      ss[sp->ply].currentMove = move;
-
-      // Step 12. Futility pruning (is omitted in PV nodes)
-
-      // Step 13. Make the move
-      pos.do_move(move, st, ci, moveIsCheck);
-
-      // Step 14. Reduced search
-      // if the move fails high will be re-searched at full depth.
-      bool doFullDepthSearch = true;
-
-      if (   !dangerous
-          && !captureOrPromotion
-          && !move_is_castle(move)
-          && !move_is_killer(move, ss[sp->ply]))
-      {
-          ss[sp->ply].reduction = reduction<PV>(sp->depth, moveCount);
+          ss[sp->ply].reduction = reduction<PvNode>(sp->depth, moveCount);
            if (ss[sp->ply].reduction)
            {
                Value localAlpha = sp->alpha;
                value = -search<NonPV>(pos, ss, -(localAlpha+1), -localAlpha, newDepth-ss[sp->ply].reduction, sp->ply+1, true, threadID);
-              doFullDepthSearch = (value > localAlpha && !TM.thread_should_stop(threadID));
+              doFullDepthSearch = (value > localAlpha);
            }
        }
  
        // Step 15. Full depth search
        if (doFullDepthSearch)
        {
-          Value localAlpha = sp->alpha;
            ss[sp->ply].reduction = Depth(0);
+          Value localAlpha = sp->alpha;
            value = -search<NonPV>(pos, ss, -(localAlpha+1), -localAlpha, newDepth, sp->ply+1, true, threadID);
  
-          if (value > localAlpha && value < sp->beta && !TM.thread_should_stop(threadID))
-          {
-              // If another thread has failed high then sp->alpha has been increased
-              // to be higher or equal then beta, if so, avoid to start a PV search.
-              localAlpha = sp->alpha;
-              if (localAlpha < sp->beta)
-                  value = -search<PV>(pos, ss, -sp->beta, -localAlpha, newDepth, sp->ply+1, false, threadID);
-          }
+          if (PvNode && value > localAlpha && value < sp->beta)
+              value = -search<PV>(pos, ss, -sp->beta, -sp->alpha, newDepth, sp->ply+1, false, threadID);
        }
  
        // Step 16. Undo move
@@ -1818,17 +1723,16 @@ namespace {
        if (value > sp->bestValue && !TM.thread_should_stop(threadID))
        {
            sp->bestValue = value;
-          if (value > sp->alpha)
+
+          if (sp->bestValue > sp->alpha)
            {
-              // Ask threads to stop before to modify sp->alpha
-              if (value >= sp->beta)
+              if (!PvNode || value >= sp->beta)
                    sp->stopRequest = true;
  
-              sp->alpha = value;
+              if (PvNode && value < sp->beta) // This guarantees that always: sp->alpha < sp->beta
+                  sp->alpha = value;
  
                sp_update_pv(sp->parentSstack, ss, sp->ply);
-              if (value == value_mate_in(sp->ply + 1))
-                  ss[sp->ply].mateKiller = move;
            }
        }
      }
@@ -1836,12 +1740,10 @@ namespace {
      /* Here we have the lock still grabbed */
  
      sp->slaves[threadID] = 0;
-    sp->cpus--;
  
      lock_release(&(sp->lock));
    }
  
-
    // init_node() is called at the beginning of all the search functions
    // (search() qsearch(), and so on) and initializes the
    // search stack object corresponding to the current node. Once every
@@ -1868,7 +1770,6 @@ namespace {
      ss[ply + 2].initKillers();
    }
  
-
    // update_pv() is called whenever a search returns a value > alpha.
    // It updates the PV in the SearchStack object corresponding to the
    // current node.
@@ -2495,21 +2396,24 @@ namespace {
              threads[threadID].state = THREAD_SEARCHING;
  
              if (threads[threadID].splitPoint->pvNode)
-                sp_search_pv(threads[threadID].splitPoint, threadID);
+                sp_search<PV>(threads[threadID].splitPoint, threadID);
              else
-                sp_search(threads[threadID].splitPoint, threadID);
+                sp_search<NonPV>(threads[threadID].splitPoint, threadID);
  
              assert(threads[threadID].state == THREAD_SEARCHING);
  
              threads[threadID].state = THREAD_AVAILABLE;
          }
  
-        // If this thread is the master of a split point and all threads have
+        // If this thread is the master of a split point and all slaves have
          // finished their work at this split point, return from the idle loop.
-        if (sp && sp->cpus == 0)
+        int i = 0;
+        for ( ; sp && i < ActiveThreads && !sp->slaves[i]; i++) {}
+
+        if (i == ActiveThreads)
          {
-            // Because sp->cpus is decremented under lock protection,
-            // be sure sp->lock has been released before to proceed.
+            // Because sp->slaves[] is reset under lock protection,
+            // be sure sp->lock has been released before to return.
              lock_grab(&(sp->lock));
              lock_release(&(sp->lock));
  
@@ -2684,35 +2588,30 @@ namespace {
  
  
    // split() does the actual work of distributing the work at a node between
-  // several threads at PV nodes. If it does not succeed in splitting the
+  // several available threads. If it does not succeed in splitting the
    // node (because no idle threads are available, or because we have no unused
-  // split point objects), the function immediately returns false. If
-  // splitting is possible, a SplitPoint object is initialized with all the
-  // data that must be copied to the helper threads (the current position and
-  // search stack, alpha, beta, the search depth, etc.), and we tell our
-  // helper threads that they have been assigned work. This will cause them
-  // to instantly leave their idle loops and call sp_search_pv(). When all
-  // threads have returned from sp_search_pv (or, equivalently, when
-  // splitPoint->cpus becomes 0), split() returns true.
-
-  bool ThreadsManager::split(const Position& p, SearchStack* sstck, int ply,
-             Value* alpha, const Value beta, Value* bestValue,
-             Depth depth, bool mateThreat, int* moves, MovePicker* mp, int master, bool pvNode) {
-
+  // split point objects), the function immediately returns. If splitting is
+  // possible, a SplitPoint object is initialized with all the data that must be
+  // copied to the helper threads and we tell our helper threads that they have
+  // been assigned work. This will cause them to instantly leave their idle loops
+  // and call sp_search(). When all threads have returned from sp_search() then
+  // split() returns.
+
+  template <bool Fake>
+  void ThreadsManager::split(const Position& p, SearchStack* sstck, int ply, Value* alpha,
+                             const Value beta, Value* bestValue, Depth depth, bool mateThreat,
+                             int* moveCount, MovePicker* mp, int master, bool pvNode) {
      assert(p.is_ok());
      assert(sstck != NULL);
      assert(ply >= 0 && ply < PLY_MAX);
      assert(*bestValue >= -VALUE_INFINITE);
-    assert(   ( pvNode && *bestValue <= *alpha)
-           || (!pvNode && *bestValue <   beta ));
-    assert(!pvNode || *alpha < beta);
+    assert(*bestValue <= *alpha);
+    assert(*alpha < beta);
      assert(beta <= VALUE_INFINITE);
      assert(depth > Depth(0));
      assert(master >= 0 && master < ActiveThreads);
      assert(ActiveThreads > 1);
  
-    SplitPoint* splitPoint;
-
      lock_grab(&MPLock);
  
      // If no other thread is available to help us, or if we have too many
@@ -2721,11 +2620,11 @@ namespace {
          || threads[master].activeSplitPoints >= ACTIVE_SPLIT_POINTS_MAX)
      {
          lock_release(&MPLock);
-        return false;
+        return;
      }
  
      // Pick the next available split point object from the split point stack
-    splitPoint = &SplitPointStack[master][threads[master].activeSplitPoints];
+    SplitPoint* splitPoint = &SplitPointStack[master][threads[master].activeSplitPoints];
  
      // Initialize the split point object
      splitPoint->parent = threads[master].splitPoint;
@@ -2737,10 +2636,8 @@ namespace {
      splitPoint->beta = beta;
      splitPoint->pvNode = pvNode;
      splitPoint->bestValue = *bestValue;
-    splitPoint->master = master;
      splitPoint->mp = mp;
-    splitPoint->moves = *moves;
-    splitPoint->cpus = 1;
+    splitPoint->moveCount = *moveCount;
      splitPoint->pos = &p;
      splitPoint->parentSstack = sstck;
      for (int i = 0; i < ActiveThreads; i++)
@@ -2752,17 +2649,19 @@ namespace {
      // If we are here it means we are not available
      assert(threads[master].state != THREAD_AVAILABLE);
  
+    int workersCnt = 1; // At least the master is included
+
      // Allocate available threads setting state to THREAD_BOOKED
-    for (int i = 0; i < ActiveThreads && splitPoint->cpus < MaxThreadsPerSplitPoint; i++)
+    for (int i = 0; !Fake && i < ActiveThreads && workersCnt < MaxThreadsPerSplitPoint; i++)
          if (thread_is_available(i, master))
          {
              threads[i].state = THREAD_BOOKED;
              threads[i].splitPoint = splitPoint;
              splitPoint->slaves[i] = 1;
-            splitPoint->cpus++;
+            workersCnt++;
          }
  
-    assert(splitPoint->cpus > 1);
+    assert(Fake || workersCnt > 1);
  
      // We can release the lock because slave threads are already booked and master is not available
      lock_release(&MPLock);
@@ -2783,8 +2682,7 @@ namespace {
      // which it will instantly launch a search, because its state is
      // THREAD_WORKISWAITING.  We send the split point as a second parameter to the
      // idle loop, which means that the main thread will return from the idle
-    // loop when all threads have finished their work at this split point
-    // (i.e. when splitPoint->cpus == 0).
+    // loop when all threads have finished their work at this split point.
      idle_loop(master, splitPoint);
  
      // We have returned from the idle loop, which means that all threads are
@@ -2797,7 +2695,6 @@ namespace {
      threads[master].splitPoint = splitPoint->parent;
  
      lock_release(&MPLock);
-    return true;
    }
  
  
@@ -2867,7 +2764,7 @@ namespace {
          init_ss_array(ss);
          pos.do_move(cur->move, st);
          moves[count].move = cur->move;
-        moves[count].score = -qsearch(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, Depth(0), 1, 0);
+        moves[count].score = -qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, Depth(0), 1, 0);
          moves[count].pv[0] = cur->move;
          moves[count].pv[1] = MOVE_NONE;
          pos.undo_move(cur->move);