Let prefetch to be enabled by default on Windows

[stockfish] / src / search.cpp
diff --git a/src/search.cpp b/src/search.cpp

index 3ca08aec2510bc8534bbdb405a0defbfb49c3bbb..a349fd6c7687c33002fbad2b5f6821a4785650b3 100644 (file)
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -89,8 +89,8 @@ namespace {
      void idle_loop(int threadID, SplitPoint* sp);
  
      template <bool Fake>
-    bool split(const Position& pos, SearchStack* ss, int ply, Value* alpha, const Value beta, Value* bestValue,
-               Depth depth, bool mateThreat, int* moves, MovePicker* mp, int master, bool pvNode);
+    void split(const Position& pos, SearchStack* ss, int ply, Value* alpha, const Value beta, Value* bestValue,
+               Depth depth, bool mateThreat, int* moveCount, MovePicker* mp, int master, bool pvNode);
  
    private:
      friend void poll();
@@ -122,7 +122,7 @@ namespace {
      // RootMove::operator<() is the comparison function used when
      // sorting the moves. A move m1 is considered to be better
      // than a move m2 if it has a higher score, or if the moves
-    // have equal score but m1 has the higher node count.
+    // have equal score but m1 has the higher beta cut-off count.
      bool operator<(const RootMove& m) const {
  
          return score != m.score ? score < m.score : theirBeta <= m.theirBeta;
@@ -234,6 +234,9 @@ namespace {
    // better than the second best move.
    const Value EasyMoveMargin = Value(0x200);
  
+  // Maximum number of moves to try before to split (strong YBWC)
+  const int MaximumSplitMove = 3;
+
    // Last seconds noise filtering (LSN)
    const bool UseLSNFiltering = true;
    const int LSNTime = 4000; // In milliseconds
@@ -804,8 +807,8 @@ namespace {
      beta = *betaPtr;
      isCheck = pos.is_check();
  
-    // Step 1. Initialize node and poll (omitted at root, but I can see no good reason for this, FIXME)
-    // Step 2. Check for aborted search (omitted at root, because we do not initialize root node)
+    // Step 1. Initialize node and poll (omitted at root, init_ss_array() has already initialized root node)
+    // Step 2. Check for aborted search (omitted at root)
      // Step 3. Mate distance pruning (omitted at root)
      // Step 4. Transposition table lookup (omitted at root)
  
@@ -813,8 +816,6 @@ namespace {
      // At root we do this only to get reference value for child nodes
      if (!isCheck)
          ss[0].eval = evaluate(pos, ei, 0);
-    else
-        ss[0].eval = VALUE_NONE; // HACK because we do not initialize root node
  
      // Step 6. Razoring (omitted at root)
      // Step 7. Static null move pruning (omitted at root)
@@ -1285,7 +1286,9 @@ namespace {
                continue;
  
            // Value based pruning
-          Depth predictedDepth = newDepth - reduction<NonPV>(depth, moveCount); // FIXME We illogically ignore reduction condition depth >= 3*OnePly
+          // We illogically ignore reduction condition depth >= 3*OnePly for predicted depth,
+          // but fixing this made program slightly weaker.
+          Depth predictedDepth = newDepth - reduction<NonPV>(depth, moveCount);
            futilityValueScaled =  ss[ply].eval + futility_margin(predictedDepth, moveCount)
                                 + H.gain(pos.piece_on(move_from(move)), move_to(move));
  
@@ -1353,6 +1356,7 @@ namespace {
                    alpha = value;
  
                update_pv(ss, ply);
+
                if (value == value_mate_in(ply + 1))
                    ss[ply].mateKiller = move;
            }
@@ -1362,13 +1366,13 @@ namespace {
        if (   TM.active_threads() > 1
            && bestValue < beta
            && depth >= MinimumSplitDepth
+          && (PvNode || moveCount > MaximumSplitMove * MinimumSplitDepth / depth)
            && Iteration <= 99
            && TM.available_thread_exists(threadID)
            && !AbortSearch
-          && !TM.thread_should_stop(threadID)
-          && TM.split<FakeSplit>(pos, ss, ply, &alpha, beta, &bestValue, depth,
-                                 mateThreat, &moveCount, &mp, threadID, PvNode))
-          break;
+          && !TM.thread_should_stop(threadID))
+          TM.split<FakeSplit>(pos, ss, ply, &alpha, beta, &bestValue, depth,
+                              mateThreat, &moveCount, &mp, threadID, PvNode);
      }
  
      // Step 19. Check for mate and stalemate
@@ -1631,10 +1635,10 @@ namespace {
      lock_grab(&(sp->lock));
  
      while (    sp->bestValue < sp->beta
-           && !TM.thread_should_stop(threadID)
-           && (move = sp->mp->get_next_move()) != MOVE_NONE)
+           && (move = sp->mp->get_next_move()) != MOVE_NONE
+           && !TM.thread_should_stop(threadID))
      {
-      moveCount = ++sp->moves;
+      moveCount = ++sp->moveCount;
        lock_release(&(sp->lock));
  
        assert(move_is_ok(move));
@@ -1697,7 +1701,7 @@ namespace {
            {
                Value localAlpha = sp->alpha;
                value = -search<NonPV>(pos, ss, -(localAlpha+1), -localAlpha, newDepth-ss[sp->ply].reduction, sp->ply+1, true, threadID);
-              doFullDepthSearch = (value > localAlpha && !TM.thread_should_stop(threadID));
+              doFullDepthSearch = (value > localAlpha);
            }
        }
  
@@ -1708,7 +1712,7 @@ namespace {
            Value localAlpha = sp->alpha;
            value = -search<NonPV>(pos, ss, -(localAlpha+1), -localAlpha, newDepth, sp->ply+1, true, threadID);
  
-          if (PvNode && value > localAlpha && value < sp->beta && !TM.thread_should_stop(threadID))
+          if (PvNode && value > localAlpha && value < sp->beta)
                value = -search<PV>(pos, ss, -sp->beta, -sp->alpha, newDepth, sp->ply+1, false, threadID);
        }
  
@@ -1733,9 +1737,6 @@ namespace {
                    sp->alpha = value;
  
                sp_update_pv(sp->parentSstack, ss, sp->ply);
-
-              if (PvNode && value == value_mate_in(sp->ply + 1))
-                  ss[sp->ply].mateKiller = move;
            }
        }
      }
@@ -1743,7 +1744,6 @@ namespace {
      /* Here we have the lock still grabbed */
  
      sp->slaves[threadID] = 0;
-    sp->cpus--;
  
      lock_release(&(sp->lock));
    }
@@ -2409,12 +2409,15 @@ namespace {
              threads[threadID].state = THREAD_AVAILABLE;
          }
  
-        // If this thread is the master of a split point and all threads have
+        // If this thread is the master of a split point and all slaves have
          // finished their work at this split point, return from the idle loop.
-        if (sp && sp->cpus == 0)
+        int i = 0;
+        for ( ; sp && i < ActiveThreads && !sp->slaves[i]; i++) {}
+
+        if (i == ActiveThreads)
          {
-            // Because sp->cpus is decremented under lock protection,
-            // be sure sp->lock has been released before to proceed.
+            // Because sp->slaves[] is reset under lock protection,
+            // be sure sp->lock has been released before to return.
              lock_grab(&(sp->lock));
              lock_release(&(sp->lock));
  
@@ -2589,21 +2592,19 @@ namespace {
  
  
    // split() does the actual work of distributing the work at a node between
-  // several threads at PV nodes. If it does not succeed in splitting the
+  // several available threads. If it does not succeed in splitting the
    // node (because no idle threads are available, or because we have no unused
-  // split point objects), the function immediately returns false. If
-  // splitting is possible, a SplitPoint object is initialized with all the
-  // data that must be copied to the helper threads (the current position and
-  // search stack, alpha, beta, the search depth, etc.), and we tell our
-  // helper threads that they have been assigned work. This will cause them
-  // to instantly leave their idle loops and call sp_search_pv(). When all
-  // threads have returned from sp_search_pv (or, equivalently, when
-  // splitPoint->cpus becomes 0), split() returns true.
+  // split point objects), the function immediately returns. If splitting is
+  // possible, a SplitPoint object is initialized with all the data that must be
+  // copied to the helper threads and we tell our helper threads that they have
+  // been assigned work. This will cause them to instantly leave their idle loops
+  // and call sp_search(). When all threads have returned from sp_search() then
+  // split() returns.
  
    template <bool Fake>
-  bool ThreadsManager::split(const Position& p, SearchStack* sstck, int ply, Value* alpha,
+  void ThreadsManager::split(const Position& p, SearchStack* sstck, int ply, Value* alpha,
                               const Value beta, Value* bestValue, Depth depth, bool mateThreat,
-                             int* moves, MovePicker* mp, int master, bool pvNode) {
+                             int* moveCount, MovePicker* mp, int master, bool pvNode) {
      assert(p.is_ok());
      assert(sstck != NULL);
      assert(ply >= 0 && ply < PLY_MAX);
@@ -2615,8 +2616,6 @@ namespace {
      assert(master >= 0 && master < ActiveThreads);
      assert(ActiveThreads > 1);
  
-    SplitPoint* splitPoint;
-
      lock_grab(&MPLock);
  
      // If no other thread is available to help us, or if we have too many
@@ -2625,11 +2624,11 @@ namespace {
          || threads[master].activeSplitPoints >= ACTIVE_SPLIT_POINTS_MAX)
      {
          lock_release(&MPLock);
-        return false;
+        return;
      }
  
      // Pick the next available split point object from the split point stack
-    splitPoint = &SplitPointStack[master][threads[master].activeSplitPoints];
+    SplitPoint* splitPoint = &SplitPointStack[master][threads[master].activeSplitPoints];
  
      // Initialize the split point object
      splitPoint->parent = threads[master].splitPoint;
@@ -2641,10 +2640,8 @@ namespace {
      splitPoint->beta = beta;
      splitPoint->pvNode = pvNode;
      splitPoint->bestValue = *bestValue;
-    splitPoint->master = master;
      splitPoint->mp = mp;
-    splitPoint->moves = *moves;
-    splitPoint->cpus = 1;
+    splitPoint->moveCount = *moveCount;
      splitPoint->pos = &p;
      splitPoint->parentSstack = sstck;
      for (int i = 0; i < ActiveThreads; i++)
@@ -2656,17 +2653,19 @@ namespace {
      // If we are here it means we are not available
      assert(threads[master].state != THREAD_AVAILABLE);
  
+    int workersCnt = 1; // At least the master is included
+
      // Allocate available threads setting state to THREAD_BOOKED
-    for (int i = 0; !Fake && i < ActiveThreads && splitPoint->cpus < MaxThreadsPerSplitPoint; i++)
+    for (int i = 0; !Fake && i < ActiveThreads && workersCnt < MaxThreadsPerSplitPoint; i++)
          if (thread_is_available(i, master))
          {
              threads[i].state = THREAD_BOOKED;
              threads[i].splitPoint = splitPoint;
              splitPoint->slaves[i] = 1;
-            splitPoint->cpus++;
+            workersCnt++;
          }
  
-    assert(Fake || splitPoint->cpus > 1);
+    assert(Fake || workersCnt > 1);
  
      // We can release the lock because slave threads are already booked and master is not available
      lock_release(&MPLock);
@@ -2687,8 +2686,7 @@ namespace {
      // which it will instantly launch a search, because its state is
      // THREAD_WORKISWAITING.  We send the split point as a second parameter to the
      // idle loop, which means that the main thread will return from the idle
-    // loop when all threads have finished their work at this split point
-    // (i.e. when splitPoint->cpus == 0).
+    // loop when all threads have finished their work at this split point.
      idle_loop(master, splitPoint);
  
      // We have returned from the idle loop, which means that all threads are
@@ -2701,7 +2699,6 @@ namespace {
      threads[master].splitPoint = splitPoint->parent;
  
      lock_release(&MPLock);
-    return true;
    }