Merge remote-tracking branch 'upstream/master' into HEAD

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Mon, 14 Dec 2020 23:33:25 +0000 (00:33 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Mon, 14 Dec 2020 23:33:25 +0000 (00:33 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 14 Dec 2020 23:33:25 +0000 (00:33 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 14 Dec 2020 23:33:25 +0000 (00:33 +0100)
diff --git a/src/evaluate.cpp b/src/evaluate.cpp

index a1b04316e79b52f3ffb474596948ef1617123cf2..c507aa06a85f8a2bf4c84e4e038f3d0cdf72fc86 100644 (file)
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -594,7 +594,7 @@ namespace {
      int kingFlankDefense = popcount(b3);
  
      kingDanger +=        kingAttackersCount[Them] * kingAttackersWeight[Them] // (~10 Elo)
-                 + 185 * popcount(kingRing[Us] & weak)                        // (~15 Elo)
+                 + 183 * popcount(kingRing[Us] & weak)                        // (~15 Elo)
                   + 148 * popcount(unsafeChecks)                               // (~4 Elo)
                   +  98 * popcount(pos.blockers_for_king(Us))                  // (~2 Elo)
                   +  69 * kingAttacksCount[Them]                               // (~0.5 Elo)
@@ -844,6 +844,8 @@ namespace {
      behind |= shift<Down>(behind);
      behind |= shift<Down+Down>(behind);
  
+    // Compute space score based on the number of safe squares and number of our pieces
+    // increased with number of total blocked pawns in position.
      int bonus = popcount(safe) + popcount(behind & safe & ~attackedBy[Them][ALL_PIECES]);
      int weight = pos.count<ALL_PIECES>(Us) - 3 + std::min(pe->blocked_count(), 9);
      Score score = make_score(bonus * weight * weight / 16, 0);
@@ -905,24 +907,36 @@ namespace {
      {
          if (pos.opposite_bishops())
          {
+            // For pure opposite colored bishops endgames use scale factor
+            // based on the number of passed pawns of the strong side.
              if (   pos.non_pawn_material(WHITE) == BishopValueMg
                  && pos.non_pawn_material(BLACK) == BishopValueMg)
                  sf = 18 + 4 * popcount(pe->passed_pawns(strongSide));
+            // For every other opposite colored bishops endgames use scale factor
+            // based on the number of all pieces of the strong side.
              else
                  sf = 22 + 3 * pos.count<ALL_PIECES>(strongSide);
          }
+        // For rook endgames with strong side not having overwhelming pawn number advantage
+        // and its pawns being on one flank and weak side protecting its pieces with a king
+        // use lower scale factor.
          else if (  pos.non_pawn_material(WHITE) == RookValueMg
                  && pos.non_pawn_material(BLACK) == RookValueMg
                  && pos.count<PAWN>(strongSide) - pos.count<PAWN>(~strongSide) <= 1
                  && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN))
                  && (attacks_bb<KING>(pos.square<KING>(~strongSide)) & pos.pieces(~strongSide, PAWN)))
              sf = 36;
+        // For queen vs no queen endgames use scale factor
+        // based on number of minors of side that doesn't have queen.
          else if (pos.count<QUEEN>() == 1)
              sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
                                                          : pos.count<BISHOP>(WHITE) + pos.count<KNIGHT>(WHITE));
+        // In every other case use scale factor based on
+        // the number of pawns of the strong side reduced if pawns are on a single flank.
          else
              sf = std::min(sf, 36 + 7 * pos.count<PAWN>(strongSide)) - 4 * !pawnsOnBothFlanks;
  
+        // Reduce scale factor in case of pawns being on a single flank
          sf -= 4 * !pawnsOnBothFlanks;
      }
  
@@ -1046,6 +1060,8 @@ Value Eval::evaluate(const Position& pos) {
        bool  largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
        bool  classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
  
+      // Use classical evaluation for really low piece endgames.
+      // The most critical case is a bishop + A/H file pawn vs naked king draw.
        bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count<PAWN>() < 2;
  
        v = classical || strongClassical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
diff --git a/src/material.cpp b/src/material.cpp

index 870a5e112cb8ed25536ec10a7165f8031c57702f..f77972e352c0d9395054750bca0ebe93aef3e73c 100644 (file)
--- a/src/material.cpp
+++ b/src/material.cpp
@@ -25,31 +25,34 @@
  using namespace std;
  
  namespace {
+  #define S(mg, eg) make_score(mg, eg)
  
    // Polynomial material imbalance parameters
  
-  constexpr int QuadraticOurs[][PIECE_TYPE_NB] = {
+  constexpr Score QuadraticOurs[][PIECE_TYPE_NB] = {
      //            OUR PIECES
      // pair pawn knight bishop rook queen
-    {1438                               }, // Bishop pair
-    {  40,   38                         }, // Pawn
-    {  32,  255, -62                    }, // Knight      OUR PIECES
-    {   0,  104,   4,    0              }, // Bishop
-    { -26,   -2,  47,   105,  -208      }, // Rook
-    {-189,   24, 117,   133,  -134, -6  }  // Queen
+    {S(1419, 1455)                                                                  }, // Bishop pair
+    {S( 101,   28), S( 37,  39)                                                     }, // Pawn
+    {S(  57,   64), S(249, 187), S(-49, -62)                                        }, // Knight      OUR PIECES
+    {S(   0,    0), S(118, 137), S( 10,  27), S(  0,   0)                           }, // Bishop
+    {S( -63,  -68), S( -5,   3), S(100,  81), S(132, 118), S(-246, -244)            }, // Rook
+    {S(-210, -211), S( 37,  14), S(147, 141), S(161, 105), S(-158, -174), S(-9,-31) }  // Queen
    };
  
-  constexpr int QuadraticTheirs[][PIECE_TYPE_NB] = {
+  constexpr Score QuadraticTheirs[][PIECE_TYPE_NB] = {
      //           THEIR PIECES
      // pair pawn knight bishop rook queen
-    {                                   }, // Bishop pair
-    {  36,                              }, // Pawn
-    {   9,   63,                        }, // Knight      OUR PIECES
-    {  59,   65,  42,                   }, // Bishop
-    {  46,   39,  24,   -24,            }, // Rook
-    {  97,  100, -42,   137,  268,      }  // Queen
+    {                                                                               }, // Bishop pair
+    {S(  33,  30)                                                                   }, // Pawn
+    {S(  46,  18), S(106,  84)                                                      }, // Knight      OUR PIECES
+    {S(  75,  35), S( 59,  44), S( 60,  15)                                         }, // Bishop
+    {S(  26,  35), S(  6,  22), S( 38,  39), S(-12,  -2)                            }, // Rook
+    {S(  97,  93), S(100, 163), S(-58, -91), S(112, 192), S(276, 225)               }  // Queen
    };
  
+  #undef S
+
    // Endgame evaluation and scaling functions are accessed directly and not through
    // the function maps because they correspond to more than one material hash key.
    Endgame<KXK>    EvaluateKXK[] = { Endgame<KXK>(WHITE),    Endgame<KXK>(BLACK) };
@@ -82,11 +85,11 @@ namespace {
    /// piece type for both colors.
  
    template<Color Us>
-  int imbalance(const int pieceCount[][PIECE_TYPE_NB]) {
+  Score imbalance(const int pieceCount[][PIECE_TYPE_NB]) {
  
      constexpr Color Them = ~Us;
  
-    int bonus = 0;
+    Score bonus = SCORE_ZERO;
  
      // Second-degree polynomial material imbalance, by Tord Romstad
      for (int pt1 = NO_PIECE_TYPE; pt1 <= QUEEN; ++pt1)
@@ -213,7 +216,7 @@ Entry* probe(const Position& pos) {
    { pos.count<BISHOP>(BLACK) > 1, pos.count<PAWN>(BLACK), pos.count<KNIGHT>(BLACK),
      pos.count<BISHOP>(BLACK)    , pos.count<ROOK>(BLACK), pos.count<QUEEN >(BLACK) } };
  
-  e->value = int16_t((imbalance<WHITE>(pieceCount) - imbalance<BLACK>(pieceCount)) / 16);
+  e->score = (imbalance<WHITE>(pieceCount) - imbalance<BLACK>(pieceCount)) / 16;
    return e;
  }
  
diff --git a/src/material.h b/src/material.h

index 80d016551113e7bcac6eb03b6c4713bad2d5aac3..28da59dbf3303ddd0ac1319acd0948669f232948 100644 (file)
--- a/src/material.h
+++ b/src/material.h
@@ -37,8 +37,8 @@ namespace Material {
  
  struct Entry {
  
-  Score imbalance() const { return make_score(value, value); }
-  Phase game_phase() const { return gamePhase; }
+  Score imbalance() const { return score; }
+  Phase game_phase() const { return (Phase)gamePhase; }
    bool specialized_eval_exists() const { return evaluationFunction != nullptr; }
    Value evaluate(const Position& pos) const { return (*evaluationFunction)(pos); }
  
@@ -57,9 +57,9 @@ struct Entry {
    const EndgameBase<Value>* evaluationFunction;
    const EndgameBase<ScaleFactor>* scalingFunction[COLOR_NB]; // Could be one for each
                                                               // side (e.g. KPKP, KBPsK)
-  int16_t value;
+  Score score;
+  int16_t gamePhase;
    uint8_t factor[COLOR_NB];
-  Phase gamePhase;
  };
  
  typedef HashTable<Entry, 8192> Table;
diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h

index caf315b2792897df8b206c57aa718cb8331ec496..a715ca85090b8d5c3d530152768810fdd2c94da5 100644 (file)
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@@ -66,6 +66,53 @@ namespace Eval::NNUE::Layers {
          biases_[i] = read_little_endian<BiasType>(stream);
        for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
          weights_[i] = read_little_endian<WeightType>(stream);
+
+#if defined (USE_SSSE3)
+      // Determine if quadruplets of weight and input products can be summed using 16bits
+      // without saturation. We assume worst case combinations of 0 and 127 for all inputs.
+      if (!stream.fail())
+      {
+          auto can_saturate = [](const WeightType* w, int idx[4]) {
+              int pSum = 0, nSum = 0;
+              for (int p = 0; p < 4; ++p)
+                  if (w[idx[p]] > 0)
+                      pSum += w[idx[p]];
+                  else
+                      nSum += w[idx[p]];
+
+              return pSum > 258 || nSum < -258;
+          };
+
+          for (IndexType i = 0; i < kOutputDimensions; ++i)
+          {
+              canSaturate16[i] = false;
+              const WeightType* w = &weights_[i * kPaddedInputDimensions];
+#if defined (USE_AVX512)
+              for (IndexType j = 0; j < (kPaddedInputDimensions & ~127) && !canSaturate16[i]; j += 128)
+                  for (int k = 0; k < 64 && !canSaturate16[i]; k += 2)
+                  {
+                      int spacing[4] = { 0, 1, 64, 65 };
+                      canSaturate16[i] = can_saturate(&w[j + k], spacing);
+                  }
+#elif defined (USE_AVX2)
+              for (IndexType j = 0; j < (kPaddedInputDimensions & ~63) && !canSaturate16[i]; j += 64)
+                  for (int k = 0; k < 32 && !canSaturate16[i]; k += 2)
+                  {
+                      int spacing[4] = { 0, 1, 32, 33 };
+                      canSaturate16[i] = can_saturate(&w[j + k], spacing);
+                  }
+#elif defined (USE_SSSE3)
+              for (IndexType j = 0; j < (kPaddedInputDimensions & ~31) && !canSaturate16[i]; j += 32)
+                  for (int k = 0; k < 16 && !canSaturate16[i]; k += 2)
+                  {
+                      int spacing[4] = { 0, 1, 16, 17 };
+                      canSaturate16[i] = can_saturate(&w[j + k], spacing);
+                  }
+#endif
+          }
+      }
+#endif
+
        return !stream.fail();
      }
  
@@ -181,13 +228,26 @@ namespace Eval::NNUE::Layers {
          return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias);
        };
  
-#if defined (USE_VNNI)
        [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
+#if defined (USE_VNNI)
          acc = _mm512_dpbusd_epi32(acc, a, b);
  #else
-      [[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i {
          __m512i product0 = _mm512_maddubs_epi16(a, b);
-        return _mm512_madd_epi16(product0, kOnes512);
+        product0 = _mm512_madd_epi16(product0, kOnes512);
+        acc = _mm512_add_epi32(acc, product0);
+#endif
+      };
+
+      [[maybe_unused]] auto m512_add_dpbusd_epi32x2 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1) {
+#if defined (USE_VNNI)
+        acc = _mm512_dpbusd_epi32(acc, a0, b0);
+        acc = _mm512_dpbusd_epi32(acc, a1, b1);
+#else
+        __m512i product0 = _mm512_maddubs_epi16(a0, b0);
+        __m512i product1 = _mm512_maddubs_epi16(a1, b1);
+        product0 = _mm512_adds_epi16(product0, product1);
+        product0 = _mm512_madd_epi16(product0, kOnes512);
+        acc = _mm512_add_epi32(acc, product0);
  #endif
        };
  
@@ -214,13 +274,27 @@ namespace Eval::NNUE::Layers {
  
          return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
        };
-#if defined (USE_VNNI)
+
        [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
+#if defined (USE_VNNI)
          acc = _mm256_dpbusd_epi32(acc, a, b);
  #else
-      [[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i {
          __m256i product0 = _mm256_maddubs_epi16(a, b);
-        return _mm256_madd_epi16(product0, kOnes256);
+        product0 = _mm256_madd_epi16(product0, kOnes256);
+        acc = _mm256_add_epi32(acc, product0);
+#endif
+      };
+
+      [[maybe_unused]] auto m256_add_dpbusd_epi32x2 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1) {
+#if defined (USE_VNNI)
+        acc = _mm256_dpbusd_epi32(acc, a0, b0);
+        acc = _mm256_dpbusd_epi32(acc, a1, b1);
+#else
+        __m256i product0 = _mm256_maddubs_epi16(a0, b0);
+        __m256i product1 = _mm256_maddubs_epi16(a1, b1);
+        product0 = _mm256_adds_epi16(product0, product1);
+        product0 = _mm256_madd_epi16(product0, kOnes256);
+        acc = _mm256_add_epi32(acc, product0);
  #endif
        };
  
@@ -245,9 +319,18 @@ namespace Eval::NNUE::Layers {
          return _mm_add_epi32(sum0, bias);
        };
  
-      [[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i {
+      [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
          __m128i product0 = _mm_maddubs_epi16(a, b);
-        return _mm_madd_epi16(product0, kOnes128);
+        product0 = _mm_madd_epi16(product0, kOnes128);
+        acc = _mm_add_epi32(acc, product0);
+      };
+
+      [[maybe_unused]] auto m128_add_dpbusd_epi32x2 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1) {
+        __m128i product0 = _mm_maddubs_epi16(a0, b0);
+        __m128i product1 = _mm_maddubs_epi16(a1, b1);
+        product0 = _mm_adds_epi16(product0, product1);
+        product0 = _mm_madd_epi16(product0, kOnes128);
+        acc = _mm_add_epi32(acc, product0);
        };
  
  #endif
@@ -291,6 +374,15 @@ namespace Eval::NNUE::Layers {
            const __m512i bias = *reinterpret_cast<const __m512i*>(&biases_[i]);
            __m512i* outptr = reinterpret_cast<__m512i*>(&output[i]);
  
+          __m512i sum01a = _mm512_setzero_si512();
+          __m512i sum23a = _mm512_setzero_si512();
+          __m512i sum45a = _mm512_setzero_si512();
+          __m512i sum67a = _mm512_setzero_si512();
+          __m512i sum01b = _mm512_setzero_si512();
+          __m512i sum23b = _mm512_setzero_si512();
+          __m512i sum45b = _mm512_setzero_si512();
+          __m512i sum67b = _mm512_setzero_si512();
+
            const auto row01a = *reinterpret_cast<const __m512i*>(&weights_[offset01a]);
            const auto row23a = *reinterpret_cast<const __m512i*>(&weights_[offset23a]);
            const auto row45a = *reinterpret_cast<const __m512i*>(&weights_[offset45a]);
@@ -303,16 +395,6 @@ namespace Eval::NNUE::Layers {
            const __m256i in256 = input_vector256[0];
            const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1);
  
-#if defined (USE_VNNI)
-          __m512i sum01a = _mm512_setzero_si512();
-          __m512i sum23a = _mm512_setzero_si512();
-          __m512i sum45a = _mm512_setzero_si512();
-          __m512i sum67a = _mm512_setzero_si512();
-          __m512i sum01b = _mm512_setzero_si512();
-          __m512i sum23b = _mm512_setzero_si512();
-          __m512i sum45b = _mm512_setzero_si512();
-          __m512i sum67b = _mm512_setzero_si512();
-
            m512_add_dpbusd_epi32(sum01a, in, row01a);
            m512_add_dpbusd_epi32(sum23a, in, row23a);
            m512_add_dpbusd_epi32(sum45a, in, row45a);
@@ -321,16 +403,6 @@ namespace Eval::NNUE::Layers {
            m512_add_dpbusd_epi32(sum23b, in, row23b);
            m512_add_dpbusd_epi32(sum45b, in, row45b);
            m512_add_dpbusd_epi32(sum67b, in, row67b);
-#else
-          __m512i sum01a = m512_dpbusd_epi32(in, row01a);
-          __m512i sum23a = m512_dpbusd_epi32(in, row23a);
-          __m512i sum45a = m512_dpbusd_epi32(in, row45a);
-          __m512i sum67a = m512_dpbusd_epi32(in, row67a);
-          __m512i sum01b = m512_dpbusd_epi32(in, row01b);
-          __m512i sum23b = m512_dpbusd_epi32(in, row23b);
-          __m512i sum45b = m512_dpbusd_epi32(in, row45b);
-          __m512i sum67b = m512_dpbusd_epi32(in, row67b);
-#endif
  
            *outptr = m512_hadd256x16(
              sum01a, sum23a, sum45a, sum67a,
@@ -351,80 +423,62 @@ namespace Eval::NNUE::Layers {
  
            if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
            {
-            const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
-            const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
-            const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
-            const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
-
-#if defined (USE_VNNI)
              __m512i sum0 = _mm512_setzero_si512();
              __m512i sum1 = _mm512_setzero_si512();
              __m512i sum2 = _mm512_setzero_si512();
              __m512i sum3 = _mm512_setzero_si512();
-            const IndexType kStart = 0;
-#else
-            __m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
-            __m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
-            __m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
-            __m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
-            const IndexType kStart = 1;
-#endif
  
-            for (IndexType j = kStart; j < kNumChunks512; ++j)
+            const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
+            const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
+            const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
+            const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
+
+            int j = 0;
+            if (!canSaturate16x4[i / 4])
+            {
+                for (; j < (int)kNumChunks512 - 1; j += 2)
+                {
+                    const __m512i in0 = input_vector512[j];
+                    const __m512i in1 = input_vector512[j + 1];
+
+                    m512_add_dpbusd_epi32x2(sum0, in0, row0[j], in1, row0[j + 1]);
+                    m512_add_dpbusd_epi32x2(sum1, in0, row1[j], in1, row1[j + 1]);
+                    m512_add_dpbusd_epi32x2(sum2, in0, row2[j], in1, row2[j + 1]);
+                    m512_add_dpbusd_epi32x2(sum3, in0, row3[j], in1, row3[j + 1]);
+                }
+            }
+            for (; j < (int)kNumChunks512; ++j)
              {
                const __m512i in = input_vector512[j];
  
-#if defined (USE_VNNI)
                m512_add_dpbusd_epi32(sum0, in, row0[j]);
                m512_add_dpbusd_epi32(sum1, in, row1[j]);
                m512_add_dpbusd_epi32(sum2, in, row2[j]);
                m512_add_dpbusd_epi32(sum3, in, row3[j]);
-#else
-              sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
-              sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j]));
-              sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j]));
-              sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j]));
-#endif
              }
  
              *outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias);
            }
            else
            {
-            const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
-            const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
-            const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
-            const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
-
-#if defined (USE_VNNI)
              __m256i sum0 = _mm256_setzero_si256();
              __m256i sum1 = _mm256_setzero_si256();
              __m256i sum2 = _mm256_setzero_si256();
              __m256i sum3 = _mm256_setzero_si256();
-            const IndexType kStart = 0;
-#else
-            __m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
-            __m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]);
-            __m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]);
-            __m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]);
-            const IndexType kStart = 1;
-#endif
  
-            for (IndexType j = kStart; j < kNumChunks256; ++j)
+            const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
+            const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
+            const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
+            const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
+
+            for (IndexType j = 0; j < kNumChunks256; ++j)
              {
                const __m256i in = input_vector256[j];
  
-#if defined (USE_VNNI)
                m256_add_dpbusd_epi32(sum0, in, row0[j]);
                m256_add_dpbusd_epi32(sum1, in, row1[j]);
                m256_add_dpbusd_epi32(sum2, in, row2[j]);
                m256_add_dpbusd_epi32(sum3, in, row3[j]);
-#else
-              sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
-              sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
-              sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
-              sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
-#endif
              }
  
              *outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
@@ -435,50 +489,30 @@ namespace Eval::NNUE::Layers {
        {
          if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
          {
-          const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
-
-#if defined (USE_VNNI)
            __m512i sum0 = _mm512_setzero_si512();
-          const IndexType kStart = 0;
-#else
-          __m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
-          const IndexType kStart = 1;
-#endif
  
-          for (IndexType j = kStart; j < kNumChunks512; ++j)
+          const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
+
+          for (IndexType j = 0; j < kNumChunks512; ++j)
            {
              const __m512i in = input_vector512[j];
  
-#if defined (USE_VNNI)
              m512_add_dpbusd_epi32(sum0, in, row0[j]);
-#else
-            sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
-#endif
            }
  
            output[0] = m512_hadd(sum0, biases_[0]);
          }
          else
          {
-          const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
-
-#if defined (USE_VNNI)
            __m256i sum0 = _mm256_setzero_si256();
-          const IndexType kStart = 0;
-#else
-          __m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
-          const IndexType kStart = 1;
-#endif
  
-          for (IndexType j = kStart; j < kNumChunks256; ++j)
+          const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
+
+          for (IndexType j = 0; j < kNumChunks256; ++j)
            {
              const __m256i in = input_vector256[j];
  
-#if defined (USE_VNNI)
              m256_add_dpbusd_epi32(sum0, in, row0[j]);
-#else
-            sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
-#endif
            }
  
            output[0] = m256_hadd(sum0, biases_[0]);
@@ -512,40 +546,38 @@ namespace Eval::NNUE::Layers {
            const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
            __m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
  
-          const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
-          const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
-          const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
-          const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
-
-#if defined (USE_VNNI)
            __m256i sum0 = _mm256_setzero_si256();
            __m256i sum1 = _mm256_setzero_si256();
            __m256i sum2 = _mm256_setzero_si256();
            __m256i sum3 = _mm256_setzero_si256();
-          const IndexType kStart = 0;
-#else
-          __m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
-          __m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]);
-          __m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]);
-          __m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]);
-          const IndexType kStart = 1;
-#endif
  
-          for (IndexType j = kStart; j < kNumChunks; ++j)
+          const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
+          const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
+          const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
+          const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
+
+          int j = 0;
+          if (!canSaturate16x4[i / 4])
            {
-            const __m256i in = input_vector[j];
+              for (; j < (int)kNumChunks - 1; j += 2)
+              {
+                  const __m256i in0 = input_vector[j];
+                  const __m256i in1 = input_vector[j + 1];
+
+                  m256_add_dpbusd_epi32x2(sum0, in0, row0[j], in1, row0[j + 1]);
+                  m256_add_dpbusd_epi32x2(sum1, in0, row1[j], in1, row1[j + 1]);
+                  m256_add_dpbusd_epi32x2(sum2, in0, row2[j], in1, row2[j + 1]);
+                  m256_add_dpbusd_epi32x2(sum3, in0, row3[j], in1, row3[j + 1]);
+              }
+          }
+          for (; j < (int)kNumChunks; ++j)
+          {
+                const __m256i in = input_vector[j];
  
-#if defined (USE_VNNI)
-            m256_add_dpbusd_epi32(sum0, in, row0[j]);
-            m256_add_dpbusd_epi32(sum1, in, row1[j]);
-            m256_add_dpbusd_epi32(sum2, in, row2[j]);
-            m256_add_dpbusd_epi32(sum3, in, row3[j]);
-#else
-            sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
-            sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
-            sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
-            sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
-#endif
+                m256_add_dpbusd_epi32(sum0, in, row0[j]);
+                m256_add_dpbusd_epi32(sum1, in, row1[j]);
+                m256_add_dpbusd_epi32(sum2, in, row2[j]);
+                m256_add_dpbusd_epi32(sum3, in, row3[j]);
            }
  
            *outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
@@ -553,25 +585,15 @@ namespace Eval::NNUE::Layers {
        }
        else if constexpr (kOutputDimensions == 1)
        {
-        const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
-
-#if defined (USE_VNNI)
          __m256i sum0 = _mm256_setzero_si256();
-        const IndexType kStart = 0;
-#else
-        __m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
-        const IndexType kStart = 1;
-#endif
  
-        for (IndexType j = kStart; j < kNumChunks; ++j)
+        const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
+
+        for (IndexType j = 0; j < kNumChunks; ++j)
          {
-          const __m256i in = input_vector[j];
+            const __m256i in = input_vector[j];
  
-#if defined (USE_VNNI)
-          m256_add_dpbusd_epi32(sum0, in, row0[j]);
-#else
-          sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
-#endif
+            m256_add_dpbusd_epi32(sum0, in, row0[j]);
          }
  
          output[0] = m256_hadd(sum0, biases_[0]);
@@ -604,24 +626,38 @@ namespace Eval::NNUE::Layers {
            const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
            __m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
  
+          __m128i sum0 = _mm_setzero_si128();
+          __m128i sum1 = _mm_setzero_si128();
+          __m128i sum2 = _mm_setzero_si128();
+          __m128i sum3 = _mm_setzero_si128();
+
            const auto row0 = reinterpret_cast<const __m128i*>(&weights_[offset0]);
            const auto row1 = reinterpret_cast<const __m128i*>(&weights_[offset1]);
            const auto row2 = reinterpret_cast<const __m128i*>(&weights_[offset2]);
            const auto row3 = reinterpret_cast<const __m128i*>(&weights_[offset3]);
  
-          __m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
-          __m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]);
-          __m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]);
-          __m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]);
-
-          for (int j = 1; j < (int)kNumChunks; ++j)
+          int j = 0;
+          if (!canSaturate16x4[i / 4])
+          {
+              for (; j < (int)kNumChunks - 1; j += 2)
+              {
+                  const __m128i in0 = input_vector[j];
+                  const __m128i in1 = input_vector[j + 1];
+
+                  m128_add_dpbusd_epi32x2(sum0, in0, row0[j], in1, row0[j + 1]);
+                  m128_add_dpbusd_epi32x2(sum1, in0, row1[j], in1, row1[j + 1]);
+                  m128_add_dpbusd_epi32x2(sum2, in0, row2[j], in1, row2[j + 1]);
+                  m128_add_dpbusd_epi32x2(sum3, in0, row3[j], in1, row3[j + 1]);
+              }
+          }
+          for (; j < (int)kNumChunks; ++j)
            {
-            const __m128i in = input_vector[j];
+              const __m128i in = input_vector[j];
  
-            sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j]));
-            sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j]));
-            sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j]));
-            sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j]));
+              m128_add_dpbusd_epi32(sum0, in, row0[j]);
+              m128_add_dpbusd_epi32(sum1, in, row1[j]);
+              m128_add_dpbusd_epi32(sum2, in, row2[j]);
+              m128_add_dpbusd_epi32(sum3, in, row3[j]);
            }
  
            *outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias);
@@ -629,12 +665,16 @@ namespace Eval::NNUE::Layers {
        }
        else if constexpr (kOutputDimensions == 1)
        {
+        __m128i sum0 = _mm_setzero_si128();
+
          const auto row0 = reinterpret_cast<const __m128i*>(&weights_[0]);
  
-        __m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
+        for (int j = 0; j < (int)kNumChunks; ++j)
+        {
+          const __m128i in = input_vector[j];
  
-        for (int j = 1; j < (int)kNumChunks; ++j)
-          sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j]));
+          m128_add_dpbusd_epi32(sum0, in, row0[j]);
+        }
  
          output[0] = m128_hadd(sum0, biases_[0]);
        }
@@ -680,9 +720,8 @@ namespace Eval::NNUE::Layers {
          for (IndexType j = 0; j < kNumChunks; ++j) {
            __m128i row_j = _mm_load_si128(&row[j]);
            __m128i input_j = _mm_load_si128(&input_vector[j]);
-          __m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j);
-          __m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs);
-          __m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs);
+          __m128i extended_row_lo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
+          __m128i extended_row_hi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
            __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
            __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
            __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
@@ -704,9 +743,8 @@ namespace Eval::NNUE::Layers {
          for (IndexType j = 0; j < kNumChunks; ++j) {
            __m64 row_j = row[j];
            __m64 input_j = input_vector[j];
-          __m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j);
-          __m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs);
-          __m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs);
+          __m64 extended_row_lo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
+          __m64 extended_row_hi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
            __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
            __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
            __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
@@ -753,8 +791,11 @@ namespace Eval::NNUE::Layers {
      PreviousLayer previous_layer_;
  
      alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-    alignas(kCacheLineSize)
-        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+    alignas(kCacheLineSize) WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+    union {
+        uint32_t canSaturate16x4[(kOutputDimensions + 3) / 4];
+        bool canSaturate16[kOutputDimensions];
+    };
    };
  
  }  // namespace Eval::NNUE::Layers
diff --git a/src/pawns.cpp b/src/pawns.cpp

index b6d2900305aa25e3119ff067a670f194f6a78090..16dbf27a5b105c3f8364d197b3913ac5f337ddfd 100644 (file)
--- a/src/pawns.cpp
+++ b/src/pawns.cpp
@@ -66,6 +66,7 @@ namespace {
      { V(-17), V( -13), V( 100), V(  4), V(  9), V(-16), V(-31) }
    };
  
+
    // KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
    // for king when the king is on a semi-open or open file.
    constexpr Score KingOnFile[2][2] = {{ S(-19,12), S(-6, 7)  },
diff --git a/src/search.cpp b/src/search.cpp

index 52541868b875c0e65d11bb1063e0982caa93fc51..cdbccb4c0abbc395f27465a33e6675ba133817d6 100644 (file)
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -676,6 +676,7 @@ namespace {
          ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
      formerPv = ss->ttPv && !PvNode;
  
+    // Update low ply history for previous move if we are near root and position is or has been in PV
      if (   ss->ttPv
          && depth > 12
          && ss->ply - 1 < MAX_LPH
@@ -700,6 +701,7 @@ namespace {
          {
              if (ttValue >= beta)
              {
+                // Bonus for a quiet ttMove that fails high
                  if (!pos.capture_or_promotion(ttMove))
                      update_quiet_stats(pos, ss, ttMove, stat_bonus(depth), depth);
  
@@ -716,6 +718,8 @@ namespace {
              }
          }
  
+        // Partial workaround for the graph history interaction problem
+        // For high rule50 counts don't produce transposition table cutoffs.
          if (pos.rule50_count() < 90)
              return ttValue;
      }
@@ -789,6 +793,7 @@ namespace {
          if (eval == VALUE_NONE)
              ss->staticEval = eval = evaluate(pos);
  
+        // Randomize draw evaluation
          if (eval == VALUE_DRAW)
              eval = value_draw(thisThread);
  
@@ -799,20 +804,33 @@ namespace {
      }
      else
      {
+        // In case of null move search use previous static eval with a different sign
+        // and addition of two tempos
          if ((ss-1)->currentMove != MOVE_NULL)
              ss->staticEval = eval = evaluate(pos);
          else
              ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
  
+        // Save static evaluation into transposition table
          tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
      }
  
+    if ((ss-1)->moveCount > 1 && is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture && depth < 7)
+    {
+        int bonus = std::clamp(- (depth+1) * 2 * int((ss-1)->staticEval + ss->staticEval - 2 * Tempo), -1000, 1000);
+        thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus;
+    }
+
      // Step 7. Razoring (~1 Elo)
      if (   !rootNode // The required rootNode PV handling is not available in qsearch
          &&  depth == 1
          &&  eval <= alpha - RazorMargin)
          return qsearch<NT>(pos, ss, alpha, beta);
  
+    // Set up improving flag that is used in various pruning heuristics
+    // We define position as improving if static evaluation of position is better
+    // Than the previous static evaluation at our turn
+    // In case of us being in check at our previous move we look at move prior to it
      improving =  (ss-2)->staticEval == VALUE_NONE
                 ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE
                 : ss->staticEval > (ss-2)->staticEval;
@@ -1170,9 +1188,10 @@ moves_loop: // When in check, search starts from here
                r -= 2;
  
            // Increase reduction at root and non-PV nodes when the best move does not change frequently
-          if ((rootNode || !PvNode) && depth > 10 && thisThread->bestMoveChanges <= 2)
+          if ((rootNode || !PvNode) && thisThread->rootDepth > 10 && thisThread->bestMoveChanges <= 2)
                r++;
  
+          // More reductions for late moves if position was not in previous PV
            if (moveCountPruning && !formerPv)
                r++;
  
@@ -1248,6 +1267,7 @@ moves_loop: // When in check, search starts from here
        {
            value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
  
+          // If the move passed LMR update its stats
            if (didLMR && !captureOrPromotion)
            {
                int bonus = value > alpha ?  stat_bonus(newDepth)
@@ -1299,8 +1319,7 @@ moves_loop: // When in check, search starts from here
                    rm.pv.push_back(*m);
  
                // We record how often the best move has been changed in each
-              // iteration. This information is used for time management: when
-              // the best move changes frequently, we allocate some more time.
+              // iteration. This information is used for time management and LMR
                if (moveCount > 1)
                    ++thisThread->bestMoveChanges;
            }
@@ -1333,6 +1352,7 @@ moves_loop: // When in check, search starts from here
            }
        }
  
+      // If the move is worse than some previously searched move, remember it to update its stats later
        if (move != bestMove)
        {
            if (captureOrPromotion && captureCount < 32)
@@ -1362,6 +1382,7 @@ moves_loop: // When in check, search starts from here
          bestValue = excludedMove ? alpha
                     :     ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW;
  
+    // If there is a move which produces search value greater than alpha we update stats of searched moves
      else if (bestMove)
          update_all_stats(pos, ss, bestMove, bestValue, beta, prevSq,
                           quietsSearched, quietCount, capturesSearched, captureCount, depth);
@@ -1383,6 +1404,7 @@ moves_loop: // When in check, search starts from here
      else if (depth > 3)
          ss->ttPv = ss->ttPv && (ss+1)->ttPv;
  
+    // Write gathered information in transposition table
      if (!excludedMove && !(rootNode && thisThread->pvIdx))
          tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
                    bestValue >= beta ? BOUND_LOWER :
@@ -1478,6 +1500,8 @@ moves_loop: // When in check, search starts from here
                  bestValue = ttValue;
          }
          else
+            // In case of null move search use previous static eval with a different sign
+            // and addition of two tempos
              ss->staticEval = bestValue =
              (ss-1)->currentMove != MOVE_NULL ? evaluate(pos)
                                               : -(ss-1)->staticEval + 2 * Tempo;
@@ -1485,6 +1509,7 @@ moves_loop: // When in check, search starts from here
          // Stand pat. Return immediately if static value is at least beta
          if (bestValue >= beta)
          {
+            // Save gathered info in transposition table
              if (!ss->ttHit)
                  tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER,
                            DEPTH_NONE, MOVE_NONE, ss->staticEval);
@@ -1612,6 +1637,7 @@ moves_loop: // When in check, search starts from here
          return mated_in(ss->ply); // Plies to mate from the root
      }
  
+    // Save gathered info in transposition table
      tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit,
                bestValue >= beta ? BOUND_LOWER :
                PvNode && bestValue > oldAlpha  ? BOUND_EXACT : BOUND_UPPER,
@@ -1695,9 +1721,10 @@ moves_loop: // When in check, search starts from here
  
      if (!pos.capture_or_promotion(bestMove))
      {
+        // Increase stats for the best move in case it was a quiet move
          update_quiet_stats(pos, ss, bestMove, bonus2, depth);
  
-        // Decrease all the non-best quiet moves
+        // Decrease stats for all non-best quiet moves
          for (int i = 0; i < quietCount; ++i)
          {
              thisThread->mainHistory[us][from_to(quietsSearched[i])] << -bonus2;
@@ -1705,14 +1732,16 @@ moves_loop: // When in check, search starts from here
          }
      }
      else
+        // Increase stats for the best move in case it was a capture move
          captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1;
  
-    // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted
+    // Extra penalty for a quiet early move that was not a TT move or
+    // main killer move in previous ply when it gets refuted.
      if (   ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0]))
          && !pos.captured_piece())
              update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1);
  
-    // Decrease all the non-best capture moves
+    // Decrease stats for all non-best capture moves
      for (int i = 0; i < captureCount; ++i)
      {
          moved_piece = pos.moved_piece(capturesSearched[i]);
@@ -1729,6 +1758,7 @@ moves_loop: // When in check, search starts from here
  
      for (int i : {1, 2, 4, 6})
      {
+        // Only update first 2 continuation histories if we are in check
          if (ss->inCheck && i > 2)
              break;
          if (is_ok((ss-i)->currentMove))
@@ -1741,6 +1771,7 @@ moves_loop: // When in check, search starts from here
  
    void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus, int depth) {
  
+    // Update killers
      if (ss->killers[0] != move)
      {
          ss->killers[1] = ss->killers[0];
@@ -1752,15 +1783,18 @@ moves_loop: // When in check, search starts from here
      thisThread->mainHistory[us][from_to(move)] << bonus;
      update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus);
  
+    // Penalty for reversed move in case of moved piece not being a pawn
      if (type_of(pos.moved_piece(move)) != PAWN)
          thisThread->mainHistory[us][from_to(reverse_move(move))] << -bonus;
  
+    // Update countermove history
      if (is_ok((ss-1)->currentMove))
      {
          Square prevSq = to_sq((ss-1)->currentMove);
          thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] = move;
      }
  
+    // Update low ply history
      if (depth > 11 && ss->ply < MAX_LPH)
          thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
    }
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Mon, 14 Dec 2020 23:33:25 +0000 (00:33 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Mon, 14 Dec 2020 23:33:25 +0000 (00:33 +0100)
src/evaluate.cpp		patch \| blob \| history
src/material.cpp		patch \| blob \| history
src/material.h		patch \| blob \| history
src/nnue/layers/affine_transform.h		patch \| blob \| history
src/pawns.cpp		patch \| blob \| history
src/search.cpp		patch \| blob \| history