More incremental accumulator updates
authorsyzygy1 <3028851+syzygy1@users.noreply.github.com>
Tue, 20 Oct 2020 19:06:06 +0000 (21:06 +0200)
committerJoost VandeVondele <Joost.VandeVondele@gmail.com>
Thu, 22 Oct 2020 18:50:16 +0000 (20:50 +0200)
This patch was inspired by c065abd which updates the accumulator,
if possible, based on the accumulator of two plies back if
the accumulator of the preceding ply is not available.

With this patch we look back even further in the position history
in an attempt to reduce the number of complete recomputations.
When we find a usable accumulator for the position N plies back,
we also update the accumulator of the position N-1 plies back
because that accumulator is most likely to be helpful later
when evaluating positions in sibling branches.
By not updating all intermediate accumulators immediately,
we avoid doing too much work that is not certain to be useful.
Overall, roughly 2-3% speedup.

This patch makes the code more specific to the net architecture,
changing input features of the net will require additional changes
to the incremental update code as discussed in the PR #3193 and #3191.

Passed STC:
https://tests.stockfishchess.org/tests/view/5f9056712c92c7fe3a8c60d0
LLR: 2.94 (-2.94,2.94) {-0.25,1.25}
Total: 10040 W: 1116 L: 968 D: 7956
Ptnml(0-2): 42, 722, 3365, 828, 63

closes https://github.com/official-stockfish/Stockfish/pull/3193

No functional change.

src/nnue/features/feature_set.h
src/nnue/nnue_accumulator.h
src/nnue/nnue_feature_transformer.h
src/position.cpp

index 26198114a3054e5c34aaa2f74a3ef19b2af40b5f..975824b658cfc9b1b07c4a4602f11c48164ac49b 100644 (file)
@@ -43,90 +43,6 @@ namespace Eval::NNUE::Features {
   template <typename Derived>
   class FeatureSetBase {
 
-   public:
-    // Get a list of indices for active features
-    template <typename IndexListType>
-    static void AppendActiveIndices(
-        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
-
-      for (Color perspective : { WHITE, BLACK }) {
-        Derived::CollectActiveIndices(
-            pos, trigger, perspective, &active[perspective]);
-      }
-    }
-
-    // Get a list of indices for recently changed features
-    template <typename PositionType, typename IndexListType>
-    static void AppendChangedIndices(
-        const PositionType& pos, TriggerEvent trigger,
-        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
-
-      auto collect_for_one = [&](const DirtyPiece& dp) {
-        for (Color perspective : { WHITE, BLACK }) {
-          switch (trigger) {
-            case TriggerEvent::kFriendKingMoved:
-              reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
-              break;
-            default:
-              assert(false);
-              break;
-          }
-          if (reset[perspective]) {
-            Derived::CollectActiveIndices(
-                pos, trigger, perspective, &added[perspective]);
-          } else {
-            Derived::CollectChangedIndices(
-                pos, dp, trigger, perspective,
-                &removed[perspective], &added[perspective]);
-          }
-        }
-      };
-
-      auto collect_for_two = [&](const DirtyPiece& dp1, const DirtyPiece& dp2) {
-        for (Color perspective : { WHITE, BLACK }) {
-          switch (trigger) {
-            case TriggerEvent::kFriendKingMoved:
-              reset[perspective] = dp1.piece[0] == make_piece(perspective, KING)
-                                || dp2.piece[0] == make_piece(perspective, KING);
-              break;
-            default:
-              assert(false);
-              break;
-          }
-          if (reset[perspective]) {
-            Derived::CollectActiveIndices(
-                pos, trigger, perspective, &added[perspective]);
-          } else {
-            Derived::CollectChangedIndices(
-                pos, dp1, trigger, perspective,
-                &removed[perspective], &added[perspective]);
-            Derived::CollectChangedIndices(
-                pos, dp2, trigger, perspective,
-                &removed[perspective], &added[perspective]);
-          }
-        }
-      };
-
-      if (pos.state()->previous->accumulator.computed_accumulation) {
-        const auto& prev_dp = pos.state()->dirtyPiece;
-        if (prev_dp.dirty_num == 0) return;
-        collect_for_one(prev_dp);
-      } else {
-        const auto& prev_dp = pos.state()->previous->dirtyPiece;
-        if (prev_dp.dirty_num == 0) {
-          const auto& prev2_dp = pos.state()->dirtyPiece;
-          if (prev2_dp.dirty_num == 0) return;
-          collect_for_one(prev2_dp);
-        } else {
-          const auto& prev2_dp = pos.state()->dirtyPiece;
-          if (prev2_dp.dirty_num == 0) {
-            collect_for_one(prev_dp);
-          } else {
-            collect_for_two(prev_dp, prev2_dp);
-          }
-        }
-      }
-    }
   };
 
   // Class template that represents the feature set
@@ -146,30 +62,6 @@ namespace Eval::NNUE::Features {
         CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
     static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
 
-   private:
-    // Get a list of indices for active features
-    static void CollectActiveIndices(
-        const Position& pos, const TriggerEvent trigger, const Color perspective,
-        IndexList* const active) {
-      if (FeatureType::kRefreshTrigger == trigger) {
-        FeatureType::AppendActiveIndices(pos, perspective, active);
-      }
-    }
-
-    // Get a list of indices for recently changed features
-    static void CollectChangedIndices(
-        const Position& pos, const DirtyPiece& dp, const TriggerEvent trigger, const Color perspective,
-        IndexList* const removed, IndexList* const added) {
-
-      if (FeatureType::kRefreshTrigger == trigger) {
-        FeatureType::AppendChangedIndices(pos, dp, perspective, removed, added);
-      }
-    }
-
-    // Make the base class and the class template that recursively uses itself a friend
-    friend class FeatureSetBase<FeatureSet>;
-    template <typename... FeatureTypes>
-    friend class FeatureSet;
   };
 
 }  // namespace Eval::NNUE::Features
index 263707102019f8596ab11c3558afc970e8479acb..a357d83526c89ef175e8fc10e6fe1e25b420e4bf 100644 (file)
 
 namespace Eval::NNUE {
 
+  // The accumulator of a StateInfo without parent is set to the INIT state
+  enum AccumulatorState { EMPTY, COMPUTED, INIT };
+
   // Class that holds the result of affine transformation of input features
   struct alignas(kCacheLineSize) Accumulator {
     std::int16_t
         accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-    bool computed_accumulation;
+    AccumulatorState state[2];
   };
 
 }  // namespace Eval::NNUE
index 2f86d20a639b712d6a0bcc51e4d70f5c1c373dd0..f145c848f96fb82349be3d78530f76b97918ab04 100644 (file)
@@ -32,7 +32,7 @@ namespace Eval::NNUE {
   // If vector instructions are enabled, we update and refresh the
   // accumulator tile by tile such that each tile fits in the CPU's
   // vector registers.
-  #define TILING
+  #define VECTOR
 
   #ifdef USE_AVX512
   typedef __m512i vec_t;
@@ -75,7 +75,7 @@ namespace Eval::NNUE {
   static constexpr IndexType kNumRegs = 16;
 
   #else
-  #undef TILING
+  #undef VECTOR
 
   #endif
 
@@ -86,7 +86,7 @@ namespace Eval::NNUE {
     // Number of output dimensions for one side
     static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
 
-    #ifdef TILING
+    #ifdef VECTOR
     static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
     static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
     #endif
@@ -119,32 +119,11 @@ namespace Eval::NNUE {
       return !stream.fail();
     }
 
-    // Proceed with the difference calculation if possible
-    bool UpdateAccumulatorIfPossible(const Position& pos) const {
-
-      const auto now = pos.state();
-      if (now->accumulator.computed_accumulation)
-        return true;
-
-      const auto prev = now->previous;
-      if (prev) {
-        if (prev->accumulator.computed_accumulation) {
-          UpdateAccumulator(pos);
-          return true;
-        } else if (prev->previous && prev->previous->accumulator.computed_accumulation) {
-          UpdateAccumulator(pos);
-          return true;
-        }
-      }
-
-      return false;
-    }
-
     // Convert input features
     void Transform(const Position& pos, OutputType* output) const {
 
-      if (!UpdateAccumulatorIfPossible(pos))
-        RefreshAccumulator(pos);
+      UpdateAccumulator(pos, WHITE);
+      UpdateAccumulator(pos, BLACK);
 
       const auto& accumulation = pos.state()->accumulator.accumulation;
 
@@ -240,153 +219,172 @@ namespace Eval::NNUE {
     }
 
    private:
-    // Calculate cumulative value without using difference calculation
-    void RefreshAccumulator(const Position& pos) const {
-
-      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (Color perspective : { WHITE, BLACK }) {
-  #ifdef TILING
-        for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
-          auto biasesTile = reinterpret_cast<const vec_t*>(
-              &biases_[j * kTileHeight]);
-          auto accTile = reinterpret_cast<vec_t*>(
-              &accumulator.accumulation[perspective][i][j * kTileHeight]);
-          vec_t acc[kNumRegs];
-
-          for (unsigned k = 0; k < kNumRegs; ++k)
-            acc[k] = biasesTile[k];
-
-          for (const auto index : active_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-            auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-
-            for (unsigned k = 0; k < kNumRegs; ++k)
-              acc[k] = vec_add_16(acc[k], column[k]);
-          }
+    void UpdateAccumulator(const Position& pos, const Color c) const {
 
-          for (unsigned k = 0; k < kNumRegs; k++)
-            vec_store(&accTile[k], acc[k]);
-        }
-  #else
-        std::memcpy(accumulator.accumulation[perspective][i], biases_,
-            kHalfDimensions * sizeof(BiasType));
-
-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-
-          for (IndexType j = 0; j < kHalfDimensions; ++j)
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-        }
+  #ifdef VECTOR
+      // Gcc-10.2 unnecessarily spills AVX2 registers if this array
+      // is defined in the VECTOR code below, once in each branch
+      vec_t acc[kNumRegs];
   #endif
-      }
 
-  #if defined(USE_MMX)
-      _mm_empty();
-  #endif
-
-      accumulator.computed_accumulation = true;
-    }
-
-    // Calculate cumulative value using difference calculation
-    void UpdateAccumulator(const Position& pos) const {
-
-      Accumulator* prev_accumulator;
-      assert(pos.state()->previous);
-      if (pos.state()->previous->accumulator.computed_accumulation) {
-        prev_accumulator = &pos.state()->previous->accumulator;
-      }
-      else {
-        assert(pos.state()->previous->previous);
-        assert(pos.state()->previous->previous->accumulator.computed_accumulation);
-        prev_accumulator = &pos.state()->previous->previous->accumulator;
+      // Look for a usable accumulator of an earlier position. We keep track
+      // of the estimated gain in terms of features to be added/subtracted.
+      StateInfo *st = pos.state(), *next = nullptr;
+      int gain = popcount(pos.pieces()) - 2;
+      while (st->accumulator.state[c] == EMPTY)
+      {
+        auto& dp = st->dirtyPiece;
+        // The first condition tests whether an incremental update is
+        // possible at all: if this side's king has moved, it is not possible.
+        static_assert(std::is_same_v<RawFeatures::SortedTriggerSet,
+              Features::CompileTimeList<Features::TriggerEvent, Features::TriggerEvent::kFriendKingMoved>>,
+              "Current code assumes that only kFriendlyKingMoved refresh trigger is being used.");
+        if (   dp.piece[0] == make_piece(c, KING)
+            || (gain -= dp.dirty_num + 1) < 0)
+          break;
+        next = st;
+        st = st->previous;
       }
 
-      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2] = { false, false };
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-
-  #ifdef TILING
-      for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
-        for (Color perspective : { WHITE, BLACK }) {
+      if (st->accumulator.state[c] == COMPUTED)
+      {
+        if (next == nullptr)
+          return;
+
+        // Update incrementally in two steps. First, we update the "next"
+        // accumulator. Then, we update the current accumulator (pos.state()).
+
+        // Gather all features to be updated. This code assumes HalfKP features
+        // only and doesn't support refresh triggers.
+        static_assert(std::is_same_v<Features::FeatureSet<Features::HalfKP<Features::Side::kFriend>>,
+                                     RawFeatures>);
+        Features::IndexList removed[2], added[2];
+        Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
+            next->dirtyPiece, c, &removed[0], &added[0]);
+        for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
+          Features::HalfKP<Features::Side::kFriend>::AppendChangedIndices(pos,
+              st2->dirtyPiece, c, &removed[1], &added[1]);
+
+        // Mark the accumulators as computed.
+        next->accumulator.state[c] = COMPUTED;
+        pos.state()->accumulator.state[c] = COMPUTED;
+
+        // Now update the accumulators listed in info[], where the last element is a sentinel.
+        StateInfo *info[3] =
+          { next, next == pos.state() ? nullptr : pos.state(), nullptr };
+  #ifdef VECTOR
+        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
+        {
+          // Load accumulator
           auto accTile = reinterpret_cast<vec_t*>(
-              &accumulator.accumulation[perspective][i][j * kTileHeight]);
-          vec_t acc[kNumRegs];
-
-          if (reset[perspective]) {
-            auto biasesTile = reinterpret_cast<const vec_t*>(
-                &biases_[j * kTileHeight]);
-            for (unsigned k = 0; k < kNumRegs; ++k)
-              acc[k] = biasesTile[k];
-          } else {
-            auto prevAccTile = reinterpret_cast<const vec_t*>(
-                &prev_accumulator->accumulation[perspective][i][j * kTileHeight]);
-            for (IndexType k = 0; k < kNumRegs; ++k)
-              acc[k] = vec_load(&prevAccTile[k]);
+            &st->accumulator.accumulation[c][0][j * kTileHeight]);
+          for (IndexType k = 0; k < kNumRegs; ++k)
+            acc[k] = vec_load(&accTile[k]);
 
+          for (IndexType i = 0; info[i]; ++i)
+          {
             // Difference calculation for the deactivated features
-            for (const auto index : removed_indices[perspective]) {
+            for (const auto index : removed[i])
+            {
               const IndexType offset = kHalfDimensions * index + j * kTileHeight;
               auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-
               for (IndexType k = 0; k < kNumRegs; ++k)
                 acc[k] = vec_sub_16(acc[k], column[k]);
             }
-          }
-          { // Difference calculation for the activated features
-            for (const auto index : added_indices[perspective]) {
+
+            // Difference calculation for the activated features
+            for (const auto index : added[i])
+            {
               const IndexType offset = kHalfDimensions * index + j * kTileHeight;
               auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-
               for (IndexType k = 0; k < kNumRegs; ++k)
                 acc[k] = vec_add_16(acc[k], column[k]);
             }
-          }
 
-          for (IndexType k = 0; k < kNumRegs; ++k)
-            vec_store(&accTile[k], acc[k]);
+            // Store accumulator
+            accTile = reinterpret_cast<vec_t*>(
+              &info[i]->accumulator.accumulation[c][0][j * kTileHeight]);
+            for (IndexType k = 0; k < kNumRegs; ++k)
+              vec_store(&accTile[k], acc[k]);
+          }
         }
-      }
-  #if defined(USE_MMX)
-      _mm_empty();
-  #endif
 
   #else
-      for (Color perspective : { WHITE, BLACK }) {
-
-        if (reset[perspective]) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator->accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
+        for (IndexType i = 0; info[i]; ++i)
+        {
+          std::memcpy(info[i]->accumulator.accumulation[c][0],
+              st->accumulator.accumulation[c][0],
+              kHalfDimensions * sizeof(BiasType));
+          st = info[i];
+
           // Difference calculation for the deactivated features
-          for (const auto index : removed_indices[perspective]) {
+          for (const auto index : removed[i])
+          {
             const IndexType offset = kHalfDimensions * index;
 
             for (IndexType j = 0; j < kHalfDimensions; ++j)
-              accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
+              st->accumulator.accumulation[c][0][j] -= weights_[offset + j];
           }
-        }
-        { // Difference calculation for the activated features
-          for (const auto index : added_indices[perspective]) {
+
+          // Difference calculation for the activated features
+          for (const auto index : added[i])
+          {
             const IndexType offset = kHalfDimensions * index;
 
             for (IndexType j = 0; j < kHalfDimensions; ++j)
-              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+              st->accumulator.accumulation[c][0][j] += weights_[offset + j];
           }
         }
+  #endif
       }
+      else
+      {
+        // Refresh the accumulator
+        auto& accumulator = pos.state()->accumulator;
+        accumulator.state[c] = COMPUTED;
+        Features::IndexList active;
+        Features::HalfKP<Features::Side::kFriend>::AppendActiveIndices(pos, c, &active);
+
+  #ifdef VECTOR
+        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j)
+        {
+          auto biasesTile = reinterpret_cast<const vec_t*>(
+              &biases_[j * kTileHeight]);
+          for (IndexType k = 0; k < kNumRegs; ++k)
+            acc[k] = biasesTile[k];
+
+          for (const auto index : active)
+          {
+            const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+            auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+
+            for (unsigned k = 0; k < kNumRegs; ++k)
+              acc[k] = vec_add_16(acc[k], column[k]);
+          }
+
+          auto accTile = reinterpret_cast<vec_t*>(
+              &accumulator.accumulation[c][0][j * kTileHeight]);
+          for (unsigned k = 0; k < kNumRegs; k++)
+            vec_store(&accTile[k], acc[k]);
+        }
+
+  #else
+        std::memcpy(accumulator.accumulation[c][0], biases_,
+            kHalfDimensions * sizeof(BiasType));
+
+        for (const auto index : active)
+        {
+          const IndexType offset = kHalfDimensions * index;
+
+          for (IndexType j = 0; j < kHalfDimensions; ++j)
+            accumulator.accumulation[c][0][j] += weights_[offset + j];
+        }
   #endif
+      }
 
-      accumulator.computed_accumulation = true;
+  #if defined(USE_MMX)
+      _mm_empty();
+  #endif
     }
 
     using BiasType = std::int16_t;
index e6a760d2c7ac34fb1e3fc381ac7416ca8c25cc3a..b707293dd730655d060e717d35703d3acf9f2322 100644 (file)
@@ -279,6 +279,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
   chess960 = isChess960;
   thisThread = th;
   set_state(st);
+  st->accumulator.state[WHITE] = Eval::NNUE::INIT;
+  st->accumulator.state[BLACK] = Eval::NNUE::INIT;
 
   assert(pos_is_ok());
 
@@ -703,7 +705,8 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   ++st->pliesFromNull;
 
   // Used by NNUE
-  st->accumulator.computed_accumulation = false;
+  st->accumulator.state[WHITE] = Eval::NNUE::EMPTY;
+  st->accumulator.state[BLACK] = Eval::NNUE::EMPTY;
   auto& dp = st->dirtyPiece;
   dp.dirty_num = 1;
 
@@ -996,16 +999,16 @@ void Position::do_null_move(StateInfo& newSt) {
   assert(!checkers());
   assert(&newSt != st);
 
-  if (Eval::useNNUE)
-  {
-      std::memcpy(&newSt, st, sizeof(StateInfo));
-  }
-  else
-      std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
+  std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
 
   newSt.previous = st;
   st = &newSt;
 
+  st->dirtyPiece.dirty_num = 0;
+  st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator()
+  st->accumulator.state[WHITE] = Eval::NNUE::EMPTY;
+  st->accumulator.state[BLACK] = Eval::NNUE::EMPTY;
+
   if (st->epSquare != SQ_NONE)
   {
       st->key ^= Zobrist::enpassant[file_of(st->epSquare)];