New NNUE architecture and net

author Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>

Tue, 18 May 2021 15:36:26 +0000 (17:36 +0200)

committer Joost VandeVondele <Joost.VandeVondele@gmail.com>

Tue, 18 May 2021 16:06:23 +0000 (18:06 +0200)
author Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
Tue, 18 May 2021 15:36:26 +0000 (17:36 +0200)
committer Joost VandeVondele <Joost.VandeVondele@gmail.com>
Tue, 18 May 2021 16:06:23 +0000 (18:06 +0200)
diff --git a/src/Makefile b/src/Makefile

index 660a13fb6bcaf4f7917b792ebde906b3216336fc..066e7697dee39007e8f6e3c92b34418e3d3a1f33 100644 (file)
--- a/src/Makefile
+++ b/src/Makefile
@@ -41,7 +41,7 @@ endif
  SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
         material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
         search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
-       nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp
+       nnue/evaluate_nnue.cpp nnue/features/half_ka_v2.cpp
  
  OBJS = $(notdir $(SRCS:.cpp=.o))
  
diff --git a/src/evaluate.cpp b/src/evaluate.cpp

index 403d59dd70d0cc0deb1b5f6f69433ff784a9ba44..256bd9944f3a6e9a4bb6a0c3a186117af56dcc93 100644 (file)
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -120,7 +120,7 @@ namespace Eval {
  
      if (filename.has_value())
          actualFilename = filename.value();
-    else 
+    else
      {
          if (eval_file_loaded != EvalFileDefaultName)
          {
@@ -1116,10 +1116,8 @@ Value Eval::evaluate(const Position& pos) {
        // Scale and shift NNUE for compatibility with search and classical evaluation
        auto  adjusted_NNUE = [&]()
        {
-         int material = pos.non_pawn_material() + 4 * PawnValueMg * pos.count<PAWN>();
-         int scale =  580
-                    + material / 32
-                    - 4 * pos.rule50_count();
+
+         int scale = 903 + 28 * pos.count<PAWN>() + 28 * pos.non_pawn_material() / 1024;
  
           Value nnue = NNUE::evaluate(pos) * scale / 1024 + Time.tempoNNUE;
  
@@ -1134,7 +1132,7 @@ Value Eval::evaluate(const Position& pos) {
        Value psq = Value(abs(eg_value(pos.psq_score())));
        int   r50 = 16 + pos.rule50_count();
        bool  largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
-      bool  classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
+      bool  classical = largePsq;
  
        // Use classical evaluation for really low piece endgames.
        // One critical case is the draw for bishop + A/H file pawn vs naked king.
@@ -1151,8 +1149,7 @@ Value Eval::evaluate(const Position& pos) {
            && !lowPieceEndgame
            && (   abs(v) * 16 < NNUEThreshold2 * r50
                || (   pos.opposite_bishops()
-                  && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
-                  && !(pos.this_thread()->nodes & 0xB))))
+                  && abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50)))
            v = adjusted_NNUE();
    }
  
diff --git a/src/evaluate.h b/src/evaluate.h

index 128a7caefcc13611f05437d19e8e4e42207ba354..ee4c175b34f908cc1ac59cc4f43ad636d0d7f00b 100644 (file)
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -39,7 +39,7 @@ namespace Eval {
    // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
    // for the build process (profile-build and fishtest) to work. Do not change the
    // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-62ef826d1a6d.nnue"
+  #define EvalFileDefaultName   "nn-8a08400ed089.nnue"
  
    namespace NNUE {
  
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp

index e0d4b9117c707676e2300e5424503e8f27e59114..97cef81480fa8033525c2ce05eaf5eb495a675f7 100644 (file)
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -35,7 +35,7 @@ namespace Stockfish::Eval::NNUE {
    LargePagePtr<FeatureTransformer> featureTransformer;
  
    // Evaluation function
-  AlignedPtr<Network> network;
+  AlignedPtr<Network> network[LayerStacks];
  
    // Evaluation function file name
    std::string fileName;
@@ -83,7 +83,8 @@ namespace Stockfish::Eval::NNUE {
    void initialize() {
  
      Detail::initialize(featureTransformer);
-    Detail::initialize(network);
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      Detail::initialize(network[i]);
    }
  
    // Read network header
@@ -92,7 +93,7 @@ namespace Stockfish::Eval::NNUE {
      std::uint32_t version, size;
  
      version     = read_little_endian<std::uint32_t>(stream);
-    *hashValue = read_little_endian<std::uint32_t>(stream);
+    *hashValue  = read_little_endian<std::uint32_t>(stream);
      size        = read_little_endian<std::uint32_t>(stream);
      if (!stream || version != Version) return false;
      desc->resize(size);
@@ -117,7 +118,8 @@ namespace Stockfish::Eval::NNUE {
      if (!read_header(stream, &hashValue, &netDescription)) return false;
      if (hashValue != HashValue) return false;
      if (!Detail::read_parameters(stream, *featureTransformer)) return false;
-    if (!Detail::read_parameters(stream, *network)) return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      if (!Detail::read_parameters(stream, *(network[i]))) return false;
      return stream && stream.peek() == std::ios::traits_type::eof();
    }
  
@@ -126,7 +128,8 @@ namespace Stockfish::Eval::NNUE {
  
      if (!write_header(stream, HashValue, netDescription)) return false;
      if (!Detail::write_parameters(stream, *featureTransformer)) return false;
-    if (!Detail::write_parameters(stream, *network)) return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+      if (!Detail::write_parameters(stream, *(network[i]))) return false;
      return (bool)stream;
    }
  
@@ -154,10 +157,15 @@ namespace Stockfish::Eval::NNUE {
      ASSERT_ALIGNED(transformedFeatures, alignment);
      ASSERT_ALIGNED(buffer, alignment);
  
-    featureTransformer->transform(pos, transformedFeatures);
-    const auto output = network->propagate(transformedFeatures, buffer);
+    const std::size_t bucket = (pos.count<ALL_PIECES>() - 1) / 4;
  
-    return static_cast<Value>(output[0] / OutputScale);
+    const auto [psqt, lazy] = featureTransformer->transform(pos, transformedFeatures, bucket);
+    if (lazy) {
+      return static_cast<Value>(psqt / OutputScale);
+    } else {
+      const auto output = network[bucket]->propagate(transformedFeatures, buffer);
+      return static_cast<Value>((output[0] + psqt) / OutputScale);
+    }
    }
  
    // Load eval, from a file stream or a memory stream
diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_ka_v2.cpp

similarity index 77%

rename from src/nnue/features/half_kp.cpp

rename to src/nnue/features/half_ka_v2.cpp

index aa1deceece2eab33b511a5749b0821e84984dd9e..57f43e50f2ffdc35ea01f32d6030d8eff90236aa 100644 (file)
--- a/src/nnue/features/half_kp.cpp
+++ b/src/nnue/features/half_ka_v2.cpp
@@ -16,32 +16,32 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
  
-//Definition of input features HalfKP of NNUE evaluation function
+//Definition of input features HalfKAv2 of NNUE evaluation function
  
-#include "half_kp.h"
+#include "half_ka_v2.h"
  
  #include "../../position.h"
  
  namespace Stockfish::Eval::NNUE::Features {
  
    // Orient a square according to perspective (rotates by 180 for black)
-  inline Square HalfKP::orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
+  inline Square HalfKAv2::orient(Color perspective, Square s) {
+    return Square(int(s) ^ (bool(perspective) * 56));
    }
  
    // Index of a feature for a given king position and another piece on some square
-  inline IndexType HalfKP::make_index(Color perspective, Square s, Piece pc, Square ksq) {
+  inline IndexType HalfKAv2::make_index(Color perspective, Square s, Piece pc, Square ksq) {
      return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq);
    }
  
    // Get a list of indices for active features
-  void HalfKP::append_active_indices(
+  void HalfKAv2::append_active_indices(
      const Position& pos,
      Color perspective,
      ValueListInserter<IndexType> active
    ) {
      Square ksq = orient(perspective, pos.square<KING>(perspective));
-    Bitboard bb = pos.pieces() & ~pos.pieces(KING);
+    Bitboard bb = pos.pieces();
      while (bb)
      {
        Square s = pop_lsb(bb);
@@ -52,7 +52,7 @@ namespace Stockfish::Eval::NNUE::Features {
  
    // append_changed_indices() : get a list of indices for recently changed features
  
-  void HalfKP::append_changed_indices(
+  void HalfKAv2::append_changed_indices(
      Square ksq,
      StateInfo* st,
      Color perspective,
@@ -63,7 +63,6 @@ namespace Stockfish::Eval::NNUE::Features {
      Square oriented_ksq = orient(perspective, ksq);
      for (int i = 0; i < dp.dirty_num; ++i) {
        Piece pc = dp.piece[i];
-      if (type_of(pc) == KING) continue;
        if (dp.from[i] != SQ_NONE)
          removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq));
        if (dp.to[i] != SQ_NONE)
@@ -71,15 +70,15 @@ namespace Stockfish::Eval::NNUE::Features {
      }
    }
  
-  int HalfKP::update_cost(StateInfo* st) {
+  int HalfKAv2::update_cost(StateInfo* st) {
      return st->dirtyPiece.dirty_num;
    }
  
-  int HalfKP::refresh_cost(const Position& pos) {
-    return pos.count<ALL_PIECES>() - 2;
+  int HalfKAv2::refresh_cost(const Position& pos) {
+    return pos.count<ALL_PIECES>();
    }
  
-  bool HalfKP::requires_refresh(StateInfo* st, Color perspective) {
+  bool HalfKAv2::requires_refresh(StateInfo* st, Color perspective) {
      return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
    }
  
diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_ka_v2.h

similarity index 73%

rename from src/nnue/features/half_kp.h

rename to src/nnue/features/half_ka_v2.h

index a09c221b1ace263f522f5c49a32baebc2d414ffb..e4b2edd9fc02e892cca8e7d03db4a6f21e754e79 100644 (file)
--- a/src/nnue/features/half_kp.h
+++ b/src/nnue/features/half_ka_v2.h
@@ -18,8 +18,8 @@
  
  //Definition of input features HalfKP of NNUE evaluation function
  
-#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
-#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+#ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
+#define NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
  
  #include "../nnue_common.h"
  
@@ -32,33 +32,34 @@ namespace Stockfish {
  
  namespace Stockfish::Eval::NNUE::Features {
  
-  // Feature HalfKP: Combination of the position of own king
-  // and the position of pieces other than kings
-  class HalfKP {
+  // Feature HalfKAv2: Combination of the position of own king
+  // and the position of pieces
+  class HalfKAv2 {
  
      // unique number for each piece type on each square
      enum {
        PS_NONE     =  0,
-      PS_W_PAWN   =  1,
-      PS_B_PAWN   =  1 * SQUARE_NB + 1,
-      PS_W_KNIGHT =  2 * SQUARE_NB + 1,
-      PS_B_KNIGHT =  3 * SQUARE_NB + 1,
-      PS_W_BISHOP =  4 * SQUARE_NB + 1,
-      PS_B_BISHOP =  5 * SQUARE_NB + 1,
-      PS_W_ROOK   =  6 * SQUARE_NB + 1,
-      PS_B_ROOK   =  7 * SQUARE_NB + 1,
-      PS_W_QUEEN  =  8 * SQUARE_NB + 1,
-      PS_B_QUEEN  =  9 * SQUARE_NB + 1,
-      PS_NB = 10 * SQUARE_NB + 1
+      PS_W_PAWN   =  0,
+      PS_B_PAWN   =  1 * SQUARE_NB,
+      PS_W_KNIGHT =  2 * SQUARE_NB,
+      PS_B_KNIGHT =  3 * SQUARE_NB,
+      PS_W_BISHOP =  4 * SQUARE_NB,
+      PS_B_BISHOP =  5 * SQUARE_NB,
+      PS_W_ROOK   =  6 * SQUARE_NB,
+      PS_B_ROOK   =  7 * SQUARE_NB,
+      PS_W_QUEEN  =  8 * SQUARE_NB,
+      PS_B_QUEEN  =  9 * SQUARE_NB,
+      PS_KING     =  10 * SQUARE_NB,
+      PS_NB = 11 * SQUARE_NB
      };
  
      static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
        // convention: W - us, B - them
        // viewed from other side, W and B are reversed
-      { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE,
-        PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE },
-      { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE,
-        PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE }
+      { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE,
+        PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE },
+      { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
+        PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE }
      };
  
      // Orient a square according to perspective (rotates by 180 for black)
@@ -69,17 +70,17 @@ namespace Stockfish::Eval::NNUE::Features {
  
     public:
      // Feature name
-    static constexpr const char* Name = "HalfKP(Friend)";
+    static constexpr const char* Name = "HalfKAv2(Friend)";
  
      // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t HashValue = 0x5D69D5B8u;
+    static constexpr std::uint32_t HashValue = 0x5f234cb8u;
  
      // Number of feature dimensions
      static constexpr IndexType Dimensions =
          static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB);
  
-    // Maximum number of simultaneously active features. 30 because kins are not included.
-    static constexpr IndexType MaxActiveDimensions = 30;
+    // Maximum number of simultaneously active features.
+    static constexpr IndexType MaxActiveDimensions = 32;
  
      // Get a list of indices for active features
      static void append_active_indices(
@@ -107,4 +108,4 @@ namespace Stockfish::Eval::NNUE::Features {
  
  }  // namespace Stockfish::Eval::NNUE::Features
  
-#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_H_INCLUDED
diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h

index fc1926912df1f9a15a731db50f47dd86e4f8d33e..9a3b778e6bbbedec7cb8b6d409c5d226f7569206 100644 (file)
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@@ -69,62 +69,15 @@ namespace Stockfish::Eval::NNUE::Layers {
        if (!previousLayer.read_parameters(stream)) return false;
        for (std::size_t i = 0; i < OutputDimensions; ++i)
          biases[i] = read_little_endian<BiasType>(stream);
-#if !defined (USE_SSSE3)
        for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+#if !defined (USE_SSSE3)
          weights[i] = read_little_endian<WeightType>(stream);
  #else
-      std::unique_ptr<uint32_t[]> indexMap = std::make_unique<uint32_t[]>(OutputDimensions * PaddedInputDimensions);
-      for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
-        const uint32_t scrambledIdx =
+        weights[
            (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
            i / PaddedInputDimensions * 4 +
-          i % 4;
-        weights[scrambledIdx] = read_little_endian<WeightType>(stream);
-        indexMap[scrambledIdx] = i;
-      }
-
-      // Determine if eights of weight and input products can be summed using 16bits
-      // without saturation. We assume worst case combinations of 0 and 127 for all inputs.
-      if (OutputDimensions > 1 && !stream.fail())
-      {
-          canSaturate16.count = 0;
-#if !defined(USE_VNNI)
-          for (IndexType i = 0; i < PaddedInputDimensions; i += 16)
-              for (IndexType j = 0; j < OutputDimensions; ++j)
-                  for (int x = 0; x < 2; ++x)
-                  {
-                      WeightType* w = &weights[i * OutputDimensions + j * 4 + x * 2];
-                      int sum[2] = {0, 0};
-                      for (int k = 0; k < 8; ++k)
-                      {
-                          IndexType idx = k / 2 * OutputDimensions * 4 + k % 2;
-                          sum[w[idx] < 0] += w[idx];
-                      }
-                      for (int sign : { -1, 1 })
-                          while (sign * sum[sign == -1] > 258)
-                          {
-                              int maxK = 0, maxW = 0;
-                              for (int k = 0; k < 8; ++k)
-                              {
-                                  IndexType idx = k / 2 * OutputDimensions * 4 + k % 2;
-                                  if (maxW < sign * w[idx])
-                                      maxK = k, maxW = sign * w[idx];
-                              }
-
-                              IndexType idx = maxK / 2 * OutputDimensions * 4 + maxK % 2;
-                              sum[sign == -1] -= w[idx];
-                              const uint32_t scrambledIdx = idx + i * OutputDimensions + j * 4 + x * 2;
-                              canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx], indexMap[scrambledIdx]);
-                              w[idx] = 0;
-                          }
-                  }
-
-          // Non functional optimization for faster more linear access
-          std::sort(canSaturate16.ids, canSaturate16.ids + canSaturate16.count,
-                    [](const typename CanSaturate::Entry& e1, const typename CanSaturate::Entry& e2)
-                    { return e1.in == e2.in ? e1.out < e2.out : e1.in < e2.in; });
-#endif
-      }
+          i % 4
+        ] = read_little_endian<WeightType>(stream);
  #endif
  
        return !stream.fail();
@@ -148,8 +101,6 @@ namespace Stockfish::Eval::NNUE::Layers {
                  i % 4
                ];
        }
-      for (int i = 0; i < canSaturate16.count; ++i)
-          unscrambledWeights[canSaturate16.ids[i].wIdx] = canSaturate16.ids[i].w;
  
        for (std::size_t i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
            write_little_endian<WeightType>(stream, unscrambledWeights[i]);
@@ -194,11 +145,11 @@ namespace Stockfish::Eval::NNUE::Layers {
          __m512i product1 = _mm512_maddubs_epi16(a1, b1);
          __m512i product2 = _mm512_maddubs_epi16(a2, b2);
          __m512i product3 = _mm512_maddubs_epi16(a3, b3);
-        product0 = _mm512_add_epi16(product0, product1);
-        product2 = _mm512_add_epi16(product2, product3);
-        product0 = _mm512_add_epi16(product0, product2);
+        product0 = _mm512_adds_epi16(product0, product1);
          product0 = _mm512_madd_epi16(product0, Ones512);
-        acc = _mm512_add_epi32(acc, product0);
+        product2 = _mm512_adds_epi16(product2, product3);
+        product2 = _mm512_madd_epi16(product2, Ones512);
+        acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product2));
  #endif
        };
  
@@ -236,11 +187,11 @@ namespace Stockfish::Eval::NNUE::Layers {
          __m256i product1 = _mm256_maddubs_epi16(a1, b1);
          __m256i product2 = _mm256_maddubs_epi16(a2, b2);
          __m256i product3 = _mm256_maddubs_epi16(a3, b3);
-        product0 = _mm256_add_epi16(product0, product1);
-        product2 = _mm256_add_epi16(product2, product3);
-        product0 = _mm256_add_epi16(product0, product2);
+        product0 = _mm256_adds_epi16(product0, product1);
          product0 = _mm256_madd_epi16(product0, Ones256);
-        acc = _mm256_add_epi32(acc, product0);
+        product2 = _mm256_adds_epi16(product2, product3);
+        product2 = _mm256_madd_epi16(product2, Ones256);
+        acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product2));
  #endif
        };
  
@@ -267,11 +218,11 @@ namespace Stockfish::Eval::NNUE::Layers {
          __m128i product1 = _mm_maddubs_epi16(a1, b1);
          __m128i product2 = _mm_maddubs_epi16(a2, b2);
          __m128i product3 = _mm_maddubs_epi16(a3, b3);
-        product0 = _mm_add_epi16(product0, product1);
-        product2 = _mm_add_epi16(product2, product3);
-        product0 = _mm_add_epi16(product0, product2);
+        product0 = _mm_adds_epi16(product0, product1);
          product0 = _mm_madd_epi16(product0, Ones128);
-        acc = _mm_add_epi32(acc, product0);
+        product2 = _mm_adds_epi16(product2, product3);
+        product2 = _mm_madd_epi16(product2, Ones128);
+        acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product2));
        };
  
  #endif
@@ -300,6 +251,8 @@ namespace Stockfish::Eval::NNUE::Layers {
  #endif
  
  #if defined (USE_SSSE3)
+      // Different layout, we process 4 inputs at a time, always.
+      static_assert(InputDimensions % 4 == 0);
  
        const auto output = reinterpret_cast<OutputType*>(buffer);
        const auto inputVector = reinterpret_cast<const vec_t*>(input);
@@ -310,7 +263,7 @@ namespace Stockfish::Eval::NNUE::Layers {
        // because then it is also an input dimension.
        if constexpr (OutputDimensions % OutputSimdWidth == 0)
        {
-          constexpr IndexType NumChunks = PaddedInputDimensions / 4;
+          constexpr IndexType NumChunks = InputDimensions / 4;
  
            const auto input32 = reinterpret_cast<const std::int32_t*>(input);
            vec_t* outptr = reinterpret_cast<vec_t*>(output);
@@ -329,8 +282,6 @@ namespace Stockfish::Eval::NNUE::Layers {
                for (int j = 0; j * OutputSimdWidth < OutputDimensions; ++j)
                    vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
            }
-          for (int i = 0; i < canSaturate16.count; ++i)
-              output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w;
        }
        else if constexpr (OutputDimensions == 1)
        {
@@ -377,17 +328,21 @@ namespace Stockfish::Eval::NNUE::Layers {
        auto output = reinterpret_cast<OutputType*>(buffer);
  
  #if defined(USE_SSE2)
-      constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+      // At least a multiple of 16, with SSE2.
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
        const __m128i Zeros = _mm_setzero_si128();
        const auto inputVector = reinterpret_cast<const __m128i*>(input);
  
  #elif defined(USE_MMX)
-      constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
        const __m64 Zeros = _mm_setzero_si64();
        const auto inputVector = reinterpret_cast<const __m64*>(input);
  
  #elif defined(USE_NEON)
-      constexpr IndexType NumChunks = PaddedInputDimensions / SimdWidth;
+      static_assert(InputDimensions % SimdWidth == 0);
+      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
        const auto inputVector = reinterpret_cast<const int8x8_t*>(input);
  #endif
  
@@ -473,25 +428,6 @@ namespace Stockfish::Eval::NNUE::Layers {
  
      alignas(CacheLineSize) BiasType biases[OutputDimensions];
      alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
-#if defined (USE_SSSE3)
-    struct CanSaturate {
-        int count;
-        struct Entry {
-            uint32_t wIdx;
-            uint16_t out;
-            uint16_t in;
-            int8_t w;
-        } ids[PaddedInputDimensions * OutputDimensions * 3 / 4];
-
-        void add(int i, int j, int8_t w, uint32_t wIdx) {
-            ids[count].wIdx = wIdx;
-            ids[count].out = i;
-            ids[count].in = j;
-            ids[count].w = w;
-            ++count;
-        }
-    } canSaturate16;
-#endif
    };
  
  }  // namespace Stockfish::Eval::NNUE::Layers
diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h

index f1ac2dfe64455a807d4ee1083f157a2dc9b33b6c..65455df4944324a12870ca29d68c9ff5e0b379b1 100644 (file)
--- a/src/nnue/layers/clipped_relu.h
+++ b/src/nnue/layers/clipped_relu.h
@@ -72,22 +72,42 @@ namespace Stockfish::Eval::NNUE::Layers {
        const auto output = reinterpret_cast<OutputType*>(buffer);
  
    #if defined(USE_AVX2)
-      constexpr IndexType NumChunks = InputDimensions / SimdWidth;
-      const __m256i Zero = _mm256_setzero_si256();
-      const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-      const auto in = reinterpret_cast<const __m256i*>(input);
-      const auto out = reinterpret_cast<__m256i*>(output);
-      for (IndexType i = 0; i < NumChunks; ++i) {
-        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 0]),
-            _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits);
-        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 2]),
-            _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits);
-        _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-            _mm256_packs_epi16(words0, words1), Zero), Offsets));
+      if constexpr (InputDimensions % SimdWidth == 0) {
+        constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+        const __m256i Zero = _mm256_setzero_si256();
+        const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+        const auto in = reinterpret_cast<const __m256i*>(input);
+        const auto out = reinterpret_cast<__m256i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i) {
+          const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+              _mm256_load_si256(&in[i * 4 + 0]),
+              _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits);
+          const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+              _mm256_load_si256(&in[i * 4 + 2]),
+              _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits);
+          _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+              _mm256_packs_epi16(words0, words1), Zero), Offsets));
+        }
+      } else {
+        constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
+        const __m128i Zero = _mm_setzero_si128();
+        const auto in = reinterpret_cast<const __m128i*>(input);
+        const auto out = reinterpret_cast<__m128i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i) {
+          const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+              _mm_load_si128(&in[i * 4 + 0]),
+              _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits);
+          const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+              _mm_load_si128(&in[i * 4 + 2]),
+              _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits);
+          const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+          _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero));
+        }
        }
-      constexpr IndexType Start = NumChunks * SimdWidth;
+      constexpr IndexType Start =
+        InputDimensions % SimdWidth == 0
+        ? InputDimensions / SimdWidth * SimdWidth
+        : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2);
  
    #elif defined(USE_SSE2)
        constexpr IndexType NumChunks = InputDimensions / SimdWidth;
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h

index 72a151f8eaa59ddf497d73f2a5abb8412c59a269..e24902c4c685123def340817584d042b2858d88e 100644 (file)
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -30,8 +30,8 @@ namespace Stockfish::Eval::NNUE {
  
    // Class that holds the result of affine transformation of input features
    struct alignas(CacheLineSize) Accumulator {
-    std::int16_t
-        accumulation[2][TransformedFeatureDimensions];
+    std::int16_t accumulation[2][TransformedFeatureDimensions];
+    std::int32_t psqtAccumulation[2][PSQTBuckets];
      AccumulatorState state[2];
    };
  
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h

index 55a01fbe15db42d56880424ba8ff5a07808edb6a..879a39cdbe68cfb4a4a6e3b64a457909520623c9 100644 (file)
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@@ -23,7 +23,7 @@
  
  #include "nnue_common.h"
  
-#include "features/half_kp.h"
+#include "features/half_ka_v2.h"
  
  #include "layers/input_slice.h"
  #include "layers/affine_transform.h"
@@ -32,16 +32,18 @@
  namespace Stockfish::Eval::NNUE {
  
    // Input features used in evaluation function
-  using FeatureSet = Features::HalfKP;
+  using FeatureSet = Features::HalfKAv2;
  
    // Number of input feature dimensions after conversion
-  constexpr IndexType TransformedFeatureDimensions = 256;
+  constexpr IndexType TransformedFeatureDimensions = 512;
+  constexpr IndexType PSQTBuckets = 8;
+  constexpr IndexType LayerStacks = 8;
  
    namespace Layers {
  
      // Define network structure
      using InputLayer = InputSlice<TransformedFeatureDimensions * 2>;
-    using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+    using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 16>>;
      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
  
diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h

index d41e02377ac34b35378a969034682a9b3029190b..dc70006120dca4bef8502d7e6428c63425f106ce 100644 (file)
--- a/src/nnue/nnue_common.h
+++ b/src/nnue/nnue_common.h
@@ -46,7 +46,7 @@
  namespace Stockfish::Eval::NNUE {
  
    // Version of the evaluation file
-  constexpr std::uint32_t Version = 0x7AF32F16u;
+  constexpr std::uint32_t Version = 0x7AF32F20u;
  
    // Constant used in evaluation value calculation
    constexpr int OutputScale = 16;
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h

index a4a8e98f9c5e8f579cea140b77126f9763184421..2c0a0c6d3134b61270a9a16a3a1ae199176d9d07 100644 (file)
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -35,45 +35,82 @@ namespace Stockfish::Eval::NNUE {
    // vector registers.
    #define VECTOR
  
+  static_assert(PSQTBuckets == 8, "Assumed by the current choice of constants.");
+
    #ifdef USE_AVX512
    typedef __m512i vec_t;
+  typedef __m256i psqt_vec_t;
    #define vec_load(a) _mm512_load_si512(a)
    #define vec_store(a,b) _mm512_store_si512(a,b)
    #define vec_add_16(a,b) _mm512_add_epi16(a,b)
    #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
+  #define vec_load_psqt(a) _mm256_load_si256(a)
+  #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
+  #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm256_setzero_si256()
    static constexpr IndexType NumRegs = 8; // only 8 are needed
+  static constexpr IndexType NumPsqtRegs = 1;
  
    #elif USE_AVX2
    typedef __m256i vec_t;
+  typedef __m256i psqt_vec_t;
    #define vec_load(a) _mm256_load_si256(a)
    #define vec_store(a,b) _mm256_store_si256(a,b)
    #define vec_add_16(a,b) _mm256_add_epi16(a,b)
    #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
+  #define vec_load_psqt(a) _mm256_load_si256(a)
+  #define vec_store_psqt(a,b) _mm256_store_si256(a,b)
+  #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm256_setzero_si256()
    static constexpr IndexType NumRegs = 16;
+  static constexpr IndexType NumPsqtRegs = 1;
  
    #elif USE_SSE2
    typedef __m128i vec_t;
+  typedef __m128i psqt_vec_t;
    #define vec_load(a) (*(a))
    #define vec_store(a,b) *(a)=(b)
    #define vec_add_16(a,b) _mm_add_epi16(a,b)
    #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b)
+  #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b)
+  #define vec_zero_psqt() _mm_setzero_si128()
    static constexpr IndexType NumRegs = Is64Bit ? 16 : 8;
+  static constexpr IndexType NumPsqtRegs = 2;
  
    #elif USE_MMX
    typedef __m64 vec_t;
+  typedef std::int32_t psqt_vec_t;
    #define vec_load(a) (*(a))
    #define vec_store(a,b) *(a)=(b)
    #define vec_add_16(a,b) _mm_add_pi16(a,b)
    #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) a+b
+  #define vec_sub_psqt_32(a,b) a-b
+  #define vec_zero_psqt() 0
    static constexpr IndexType NumRegs = 8;
+  static constexpr IndexType NumPsqtRegs = 8;
  
    #elif USE_NEON
    typedef int16x8_t vec_t;
+  typedef int32x4_t psqt_vec_t;
    #define vec_load(a) (*(a))
    #define vec_store(a,b) *(a)=(b)
    #define vec_add_16(a,b) vaddq_s16(a,b)
    #define vec_sub_16(a,b) vsubq_s16(a,b)
+  #define vec_load_psqt(a) (*(a))
+  #define vec_store_psqt(a,b) *(a)=(b)
+  #define vec_add_psqt_32(a,b) vaddq_s32(a,b)
+  #define vec_sub_psqt_32(a,b) vsubq_s32(a,b)
+  #define vec_zero_psqt() psqt_vec_t{0}
    static constexpr IndexType NumRegs = 16;
+  static constexpr IndexType NumPsqtRegs = 2;
  
    #else
    #undef VECTOR
@@ -87,9 +124,13 @@ namespace Stockfish::Eval::NNUE {
      // Number of output dimensions for one side
      static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
  
+    static constexpr int LazyThreshold = 1400;
+
      #ifdef VECTOR
      static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
+    static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
      static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
+    static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets");
      #endif
  
     public:
@@ -115,6 +156,8 @@ namespace Stockfish::Eval::NNUE {
          biases[i] = read_little_endian<BiasType>(stream);
        for (std::size_t i = 0; i < HalfDimensions * InputDimensions; ++i)
          weights[i] = read_little_endian<WeightType>(stream);
+      for (std::size_t i = 0; i < PSQTBuckets * InputDimensions; ++i)
+        psqtWeights[i] = read_little_endian<PSQTWeightType>(stream);
        return !stream.fail();
      }
  
@@ -128,11 +171,21 @@ namespace Stockfish::Eval::NNUE {
      }
  
      // Convert input features
-    void transform(const Position& pos, OutputType* output) const {
+    std::pair<std::int32_t, bool> transform(const Position& pos, OutputType* output, int bucket) const {
        update_accumulator(pos, WHITE);
        update_accumulator(pos, BLACK);
  
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
        const auto& accumulation = pos.state()->accumulator.accumulation;
+      const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;
+
+      const auto psqt = (
+            psqtAccumulation[static_cast<int>(perspectives[0])][bucket]
+          - psqtAccumulation[static_cast<int>(perspectives[1])][bucket]
+        ) / 2;
+
+      if (abs(psqt) > LazyThreshold * OutputScale)
+        return { psqt, true };
  
    #if defined(USE_AVX512)
        constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
@@ -163,7 +216,6 @@ namespace Stockfish::Eval::NNUE {
        const int8x8_t Zero = {0};
    #endif
  
-      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
        for (IndexType p = 0; p < 2; ++p) {
          const IndexType offset = HalfDimensions * p;
  
@@ -240,6 +292,8 @@ namespace Stockfish::Eval::NNUE {
    #if defined(USE_MMX)
        _mm_empty();
    #endif
+
+      return { psqt, false };
      }
  
     private:
@@ -255,6 +309,7 @@ namespace Stockfish::Eval::NNUE {
        // Gcc-10.2 unnecessarily spills AVX2 registers if this array
        // is defined in the VECTOR code below, once in each branch
        vec_t acc[NumRegs];
+      psqt_vec_t psqt[NumPsqtRegs];
    #endif
  
        // Look for a usable accumulator of an earlier position. We keep track
@@ -333,12 +388,52 @@ namespace Stockfish::Eval::NNUE {
            }
          }
  
+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+        {
+          // Load accumulator
+          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &st->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_load_psqt(&accTilePsqt[k]);
+
+          for (IndexType i = 0; states_to_update[i]; ++i)
+          {
+            // Difference calculation for the deactivated features
+            for (const auto index : removed[i])
+            {
+              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            // Difference calculation for the activated features
+            for (const auto index : added[i])
+            {
+              const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+              auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+              for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            // Store accumulator
+            accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+              &states_to_update[i]->accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              vec_store_psqt(&accTilePsqt[k], psqt[k]);
+          }
+        }
+
    #else
          for (IndexType i = 0; states_to_update[i]; ++i)
          {
            std::memcpy(states_to_update[i]->accumulator.accumulation[perspective],
                st->accumulator.accumulation[perspective],
                HalfDimensions * sizeof(BiasType));
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            states_to_update[i]->accumulator.psqtAccumulation[perspective][k] = st->accumulator.psqtAccumulation[perspective][k];
+
            st = states_to_update[i];
  
            // Difference calculation for the deactivated features
@@ -348,6 +443,9 @@ namespace Stockfish::Eval::NNUE {
  
              for (IndexType j = 0; j < HalfDimensions; ++j)
                st->accumulator.accumulation[perspective][j] -= weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+              st->accumulator.psqtAccumulation[perspective][k] -= psqtWeights[index * PSQTBuckets + k];
            }
  
            // Difference calculation for the activated features
@@ -357,6 +455,9 @@ namespace Stockfish::Eval::NNUE {
  
              for (IndexType j = 0; j < HalfDimensions; ++j)
                st->accumulator.accumulation[perspective][j] += weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+              st->accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k];
            }
          }
    #endif
@@ -392,16 +493,42 @@ namespace Stockfish::Eval::NNUE {
              vec_store(&accTile[k], acc[k]);
          }
  
+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+        {
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            psqt[k] = vec_zero_psqt();
+
+          for (const auto index : active)
+          {
+            const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+            auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+              psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+          }
+
+          auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
+            &accumulator.psqtAccumulation[perspective][j * PsqtTileHeight]);
+          for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+            vec_store_psqt(&accTilePsqt[k], psqt[k]);
+        }
+
    #else
          std::memcpy(accumulator.accumulation[perspective], biases,
              HalfDimensions * sizeof(BiasType));
  
+        for (std::size_t k = 0; k < PSQTBuckets; ++k)
+          accumulator.psqtAccumulation[perspective][k] = 0;
+
          for (const auto index : active)
          {
            const IndexType offset = HalfDimensions * index;
  
            for (IndexType j = 0; j < HalfDimensions; ++j)
              accumulator.accumulation[perspective][j] += weights[offset + j];
+
+          for (std::size_t k = 0; k < PSQTBuckets; ++k)
+            accumulator.psqtAccumulation[perspective][k] += psqtWeights[index * PSQTBuckets + k];
          }
    #endif
        }
@@ -413,9 +540,11 @@ namespace Stockfish::Eval::NNUE {
  
      using BiasType = std::int16_t;
      using WeightType = std::int16_t;
+    using PSQTWeightType = std::int32_t;
  
      alignas(CacheLineSize) BiasType biases[HalfDimensions];
      alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
+    alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
    };
  
  }  // namespace Stockfish::Eval::NNUE
diff --git a/src/search.cpp b/src/search.cpp

index 29b334ed5fb535f81fd655eee0ef253eb32d4c1e..ac026a79e1ce208a26f4bb9c7fc6477be703bcd2 100644 (file)
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -66,7 +66,7 @@ namespace {
  
    // Futility margin
    Value futility_margin(Depth d, bool improving) {
-    return Value(234 * (d - improving));
+    return Value(231 * (d - improving));
    }
  
    // Reductions lookup table, initialized at startup
@@ -801,7 +801,7 @@ namespace {
          && (ss-1)->statScore < 24185
          &&  eval >= beta
          &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 24 * depth - 34 * improving + 162 * ss->ttPv + 159
+        &&  ss->staticEval >= beta - 22 * depth - 34 * improving + 162 * ss->ttPv + 159
          && !excludedMove
          &&  pos.non_pawn_material(us)
          && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -1172,7 +1172,7 @@ moves_loop: // When in check, search starts from here
                               + (*contHist[0])[movedPiece][to_sq(move)]
                               + (*contHist[1])[movedPiece][to_sq(move)]
                               + (*contHist[3])[movedPiece][to_sq(move)]
-                             - 4741;
+                             - 4791;
  
                // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
                if (!ss->inCheck)
author	Tomasz Sobczyk <tomasz.sobczyk1997@gmail.com>
	Tue, 18 May 2021 15:36:26 +0000 (17:36 +0200)
committer	Joost VandeVondele <Joost.VandeVondele@gmail.com>
	Tue, 18 May 2021 16:06:23 +0000 (18:06 +0200)
src/Makefile		patch \| blob \| history
src/evaluate.cpp		patch \| blob \| history
src/evaluate.h		patch \| blob \| history
src/nnue/evaluate_nnue.cpp		patch \| blob \| history
src/nnue/features/half_ka_v2.cpp	[moved from src/nnue/features/half_kp.cpp with 77% similarity]	patch \| blob \| history
src/nnue/features/half_ka_v2.h	[moved from src/nnue/features/half_kp.h with 73% similarity]	patch \| blob \| history
src/nnue/layers/affine_transform.h		patch \| blob \| history
src/nnue/layers/clipped_relu.h		patch \| blob \| history
src/nnue/nnue_accumulator.h		patch \| blob \| history
src/nnue/nnue_architecture.h		patch \| blob \| history
src/nnue/nnue_common.h		patch \| blob \| history
src/nnue/nnue_feature_transformer.h		patch \| blob \| history
src/search.cpp		patch \| blob \| history