From b748b46714d5f8e0acca0a042ede1fc95e4f5190 Mon Sep 17 00:00:00 2001 From: Tomasz Sobczyk Date: Sat, 24 Apr 2021 15:08:11 +0200 Subject: [PATCH] Cleanup and simplify NNUE code. A lot of optimizations happend since the NNUE was introduced and since then some parts of the code were left unused. This got to the point where asserts were have to be made just to let people know that modifying something will not have any effects or may even break everything due to the assumptions being made. Removing these parts removes those inexisting "false dependencies". Additionally: * append_changed_indices now takes the king pos and stateinfo explicitly, no more misleading pos parameter * IndexList is removed in favor of a generic ValueList. Feature transformer just instantiates the type it needs. * The update cost and refresh requirement is deferred to the feature set once again, but now doesn't go through the whole FeatureSet machinery and just calls HalfKP directly. * accumulator no longer has a singular dimension. * The PS constants and the PieceSquareIndex array are made local to the HalfKP feature set because they are specific to it and DO differ for other feature sets. * A few names are changed to more descriptive Passed STC non-regression: https://tests.stockfishchess.org/tests/view/608421dd95e7f1852abd2790 LLR: 2.95 (-2.94,2.94) <-2.50,0.50> Total: 180008 W: 16186 L: 16258 D: 147564 Ptnml(0-2): 587, 12593, 63725, 12503, 596 closes https://github.com/official-stockfish/Stockfish/pull/3441 No functional change --- src/misc.h | 43 ++++++++ src/nnue/architectures/halfkp_256x2-32-32.h | 54 ---------- src/nnue/features/feature_set.h | 69 ------------ src/nnue/features/features_common.h | 45 -------- src/nnue/features/half_kp.cpp | 65 ++++++------ src/nnue/features/half_kp.h | 75 ++++++++++--- src/nnue/features/index_list.h | 64 ------------ src/nnue/nnue_accumulator.h | 2 +- src/nnue/nnue_architecture.h | 30 +++++- src/nnue/nnue_common.h | 25 ----- src/nnue/nnue_feature_transformer.h | 110 ++++++++++---------- 11 files changed, 219 insertions(+), 363 deletions(-) delete mode 100644 src/nnue/architectures/halfkp_256x2-32-32.h delete mode 100644 src/nnue/features/feature_set.h delete mode 100644 src/nnue/features/features_common.h delete mode 100644 src/nnue/features/index_list.h diff --git a/src/misc.h b/src/misc.h index f834e470..59ca6e37 100644 --- a/src/misc.h +++ b/src/misc.h @@ -78,6 +78,49 @@ T* align_ptr_up(T* ptr) return reinterpret_cast(reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); } +template +class ValueListInserter { +public: + ValueListInserter(T* v, std::size_t& s) : + values(v), + size(&s) + { + } + + void push_back(const T& value) { values[(*size)++] = value; } +private: + T* values; + std::size_t* size; +}; + +template +class ValueList { + +public: + std::size_t size() const { return size_; } + void resize(std::size_t newSize) { size_ = newSize; } + void push_back(const T& value) { values_[size_++] = value; } + T& operator[](std::size_t index) { return values_[index]; } + T* begin() { return values_; } + T* end() { return values_ + size_; } + const T& operator[](std::size_t index) const { return values_[index]; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } + operator ValueListInserter() { return ValueListInserter(values_, size_); } + + void swap(ValueList& other) { + const std::size_t maxSize = std::max(size_, other.size_); + for (std::size_t i = 0; i < maxSize; ++i) { + std::swap(values_[i], other.values_[i]); + } + std::swap(size_, other.size_); + } + +private: + T values_[MaxSize]; + std::size_t size_ = 0; +}; + /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated /// to the public domain by Sebastiano Vigna (2014). diff --git a/src/nnue/architectures/halfkp_256x2-32-32.h b/src/nnue/architectures/halfkp_256x2-32-32.h deleted file mode 100644 index 5f6cc7f3..00000000 --- a/src/nnue/architectures/halfkp_256x2-32-32.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of input features and network structure used in NNUE evaluation function - -#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED -#define NNUE_HALFKP_256X2_32_32_H_INCLUDED - -#include "../features/feature_set.h" -#include "../features/half_kp.h" - -#include "../layers/input_slice.h" -#include "../layers/affine_transform.h" -#include "../layers/clipped_relu.h" - -namespace Stockfish::Eval::NNUE { - -// Input features used in evaluation function -using RawFeatures = Features::FeatureSet< - Features::HalfKP>; - -// Number of input feature dimensions after conversion -constexpr IndexType TransformedFeatureDimensions = 256; - -namespace Layers { - -// Define network structure -using InputLayer = InputSlice; -using HiddenLayer1 = ClippedReLU>; -using HiddenLayer2 = ClippedReLU>; -using OutputLayer = AffineTransform; - -} // namespace Layers - -using Network = Layers::OutputLayer; - -} // namespace Stockfish::Eval::NNUE - -#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h deleted file mode 100644 index d09f9b94..00000000 --- a/src/nnue/features/feature_set.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// A class template that represents the input feature set of the NNUE evaluation function - -#ifndef NNUE_FEATURE_SET_H_INCLUDED -#define NNUE_FEATURE_SET_H_INCLUDED - -#include "features_common.h" -#include - -namespace Stockfish::Eval::NNUE::Features { - - // Class template that represents a list of values - template - struct CompileTimeList; - - template - struct CompileTimeList { - static constexpr bool Contains(T value) { - return value == First || CompileTimeList::Contains(value); - } - static constexpr std::array - Values = {{First, Remaining...}}; - }; - - // Base class of feature set - template - class FeatureSetBase { - - }; - - // Class template that represents the feature set - template - class FeatureSet : public FeatureSetBase> { - - public: - // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = FeatureType::HashValue; - // Number of feature dimensions - static constexpr IndexType Dimensions = FeatureType::Dimensions; - // Maximum number of simultaneously active features - static constexpr IndexType MaxActiveDimensions = - FeatureType::MaxActiveDimensions; - // Trigger for full calculation instead of difference calculation - using SortedTriggerSet = - CompileTimeList; - static constexpr auto RefreshTriggers = SortedTriggerSet::Values; - - }; - -} // namespace Stockfish::Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h deleted file mode 100644 index 9584cac8..00000000 --- a/src/nnue/features/features_common.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -//Common header of input features of NNUE evaluation function - -#ifndef NNUE_FEATURES_COMMON_H_INCLUDED -#define NNUE_FEATURES_COMMON_H_INCLUDED - -#include "../../evaluate.h" -#include "../nnue_common.h" - -namespace Stockfish::Eval::NNUE::Features { - - class IndexList; - - template - class FeatureSet; - - // Trigger to perform full calculations instead of difference only - enum class TriggerEvent { - FriendKingMoved // calculate full evaluation when own king moves - }; - - enum class Side { - Friend // side to move - }; - -} // namespace Stockfish::Eval::NNUE::Features - -#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp index 5c7538de..aa1decee 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_kp.cpp @@ -19,69 +19,68 @@ //Definition of input features HalfKP of NNUE evaluation function #include "half_kp.h" -#include "index_list.h" + +#include "../../position.h" namespace Stockfish::Eval::NNUE::Features { // Orient a square according to perspective (rotates by 180 for black) - inline Square orient(Color perspective, Square s) { + inline Square HalfKP::orient(Color perspective, Square s) { return Square(int(s) ^ (bool(perspective) * 63)); } // Index of a feature for a given king position and another piece on some square - inline IndexType make_index(Color perspective, Square s, Piece pc, Square ksq) { + inline IndexType HalfKP::make_index(Color perspective, Square s, Piece pc, Square ksq) { return IndexType(orient(perspective, s) + PieceSquareIndex[perspective][pc] + PS_NB * ksq); } // Get a list of indices for active features - template - void HalfKP::append_active_indices( - const Position& pos, Color perspective, IndexList* active) { - + void HalfKP::append_active_indices( + const Position& pos, + Color perspective, + ValueListInserter active + ) { Square ksq = orient(perspective, pos.square(perspective)); Bitboard bb = pos.pieces() & ~pos.pieces(KING); while (bb) { Square s = pop_lsb(bb); - active->push_back(make_index(perspective, s, pos.piece_on(s), ksq)); + active.push_back(make_index(perspective, s, pos.piece_on(s), ksq)); } } // append_changed_indices() : get a list of indices for recently changed features - // IMPORTANT: The `pos` in this function is pretty much useless as it - // is not always the position the features are updated to. The feature - // transformer code right now can update multiple accumulators per move, - // but since Stockfish only keeps the full state of the current leaf - // search position it is not possible to always pass here the position for - // which the accumulator is being updated. Therefore the only thing that - // can be reliably extracted from `pos` is the king square for the king - // of the `perspective` color (note: not even the other king's square will - // match reality in all cases, this is also the reason why `dp` is passed - // as a parameter and not extracted from pos.state()). This is of particular - // problem for future nets with other feature sets, where updating the active - // feature might require more information from the intermediate positions. In - // this case the only easy solution is to remove the multiple updates from - // the feature transformer update code and only update the accumulator for - // the current leaf position (the position after the move). - - template - void HalfKP::append_changed_indices( - const Position& pos, const DirtyPiece& dp, Color perspective, - IndexList* removed, IndexList* added) { - - Square ksq = orient(perspective, pos.square(perspective)); + void HalfKP::append_changed_indices( + Square ksq, + StateInfo* st, + Color perspective, + ValueListInserter removed, + ValueListInserter added + ) { + const auto& dp = st->dirtyPiece; + Square oriented_ksq = orient(perspective, ksq); for (int i = 0; i < dp.dirty_num; ++i) { Piece pc = dp.piece[i]; if (type_of(pc) == KING) continue; if (dp.from[i] != SQ_NONE) - removed->push_back(make_index(perspective, dp.from[i], pc, ksq)); + removed.push_back(make_index(perspective, dp.from[i], pc, oriented_ksq)); if (dp.to[i] != SQ_NONE) - added->push_back(make_index(perspective, dp.to[i], pc, ksq)); + added.push_back(make_index(perspective, dp.to[i], pc, oriented_ksq)); } } - template class HalfKP; + int HalfKP::update_cost(StateInfo* st) { + return st->dirtyPiece.dirty_num; + } + + int HalfKP::refresh_cost(const Position& pos) { + return pos.count() - 2; + } + + bool HalfKP::requires_refresh(StateInfo* st, Color perspective) { + return st->dirtyPiece.piece[0] == make_piece(perspective, KING); + } } // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h index 14efb089..a09c221b 100644 --- a/src/nnue/features/half_kp.h +++ b/src/nnue/features/half_kp.h @@ -21,37 +21,88 @@ #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED #define NNUE_FEATURES_HALF_KP_H_INCLUDED +#include "../nnue_common.h" + #include "../../evaluate.h" -#include "features_common.h" +#include "../../misc.h" + +namespace Stockfish { + struct StateInfo; +} namespace Stockfish::Eval::NNUE::Features { // Feature HalfKP: Combination of the position of own king // and the position of pieces other than kings - template class HalfKP { + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_NB = 10 * SQUARE_NB + 1 + }; + + static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE }, + { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE } + }; + + // Orient a square according to perspective (rotates by 180 for black) + static Square orient(Color perspective, Square s); + + // Index of a feature for a given king position and another piece on some square + static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); + public: // Feature name static constexpr const char* Name = "HalfKP(Friend)"; + // Hash value embedded in the evaluation file - static constexpr std::uint32_t HashValue = - 0x5D69D5B9u ^ (AssociatedKing == Side::Friend); + static constexpr std::uint32_t HashValue = 0x5D69D5B8u; + // Number of feature dimensions static constexpr IndexType Dimensions = static_cast(SQUARE_NB) * static_cast(PS_NB); - // Maximum number of simultaneously active features - static constexpr IndexType MaxActiveDimensions = 30; // Kings don't count - // Trigger for full calculation instead of difference calculation - static constexpr TriggerEvent RefreshTrigger = TriggerEvent::FriendKingMoved; + + // Maximum number of simultaneously active features. 30 because kins are not included. + static constexpr IndexType MaxActiveDimensions = 30; // Get a list of indices for active features - static void append_active_indices(const Position& pos, Color perspective, - IndexList* active); + static void append_active_indices( + const Position& pos, + Color perspective, + ValueListInserter active); // Get a list of indices for recently changed features - static void append_changed_indices(const Position& pos, const DirtyPiece& dp, Color perspective, - IndexList* removed, IndexList* added); + static void append_changed_indices( + Square ksq, + StateInfo* st, + Color perspective, + ValueListInserter removed, + ValueListInserter added); + + // Returns the cost of updating one perspective, the most costly one. + // Assumes no refresh needed. + static int update_cost(StateInfo* st); + static int refresh_cost(const Position& pos); + + // Returns whether the change stored in this StateInfo means that + // a full accumulator refresh is required. + static bool requires_refresh(StateInfo* st, Color perspective); }; } // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/index_list.h b/src/nnue/features/index_list.h deleted file mode 100644 index edf0add1..00000000 --- a/src/nnue/features/index_list.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) - - Stockfish is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Stockfish is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -// Definition of index list of input features - -#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED -#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED - -#include "../../position.h" -#include "../nnue_architecture.h" - -namespace Stockfish::Eval::NNUE::Features { - - // Class template used for feature index list - template - class ValueList { - - public: - std::size_t size() const { return size_; } - void resize(std::size_t size) { size_ = size; } - void push_back(const T& value) { values_[size_++] = value; } - T& operator[](std::size_t index) { return values_[index]; } - T* begin() { return values_; } - T* end() { return values_ + size_; } - const T& operator[](std::size_t index) const { return values_[index]; } - const T* begin() const { return values_; } - const T* end() const { return values_ + size_; } - - void swap(ValueList& other) { - const std::size_t max_size = std::max(size_, other.size_); - for (std::size_t i = 0; i < max_size; ++i) { - std::swap(values_[i], other.values_[i]); - } - std::swap(size_, other.size_); - } - - private: - T values_[MaxSize]; - std::size_t size_ = 0; - }; - - //Type of feature index list - class IndexList - : public ValueList { - }; - -} // namespace Stockfish::Eval::NNUE::Features - -#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index aeb5f2bd..72a151f8 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -31,7 +31,7 @@ namespace Stockfish::Eval::NNUE { // Class that holds the result of affine transformation of input features struct alignas(CacheLineSize) Accumulator { std::int16_t - accumulation[2][RefreshTriggers.size()][TransformedFeatureDimensions]; + accumulation[2][TransformedFeatureDimensions]; AccumulatorState state[2]; }; diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index f59474df..55a01fbe 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -21,18 +21,38 @@ #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED -// Defines the network structure -#include "architectures/halfkp_256x2-32-32.h" +#include "nnue_common.h" + +#include "features/half_kp.h" + +#include "layers/input_slice.h" +#include "layers/affine_transform.h" +#include "layers/clipped_relu.h" namespace Stockfish::Eval::NNUE { + // Input features used in evaluation function + using FeatureSet = Features::HalfKP; + + // Number of input feature dimensions after conversion + constexpr IndexType TransformedFeatureDimensions = 256; + + namespace Layers { + + // Define network structure + using InputLayer = InputSlice; + using HiddenLayer1 = ClippedReLU>; + using HiddenLayer2 = ClippedReLU>; + using OutputLayer = AffineTransform; + + } // namespace Layers + + using Network = Layers::OutputLayer; + static_assert(TransformedFeatureDimensions % MaxSimdWidth == 0, ""); static_assert(Network::OutputDimensions == 1, ""); static_assert(std::is_same::value, ""); - // Trigger for full calculation instead of difference calculation - constexpr auto RefreshTriggers = RawFeatures::RefreshTriggers; - } // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 20eb27d4..8c54f9ba 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -71,31 +71,6 @@ namespace Stockfish::Eval::NNUE { constexpr std::size_t MaxSimdWidth = 32; - // unique number for each piece type on each square - enum { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_NB = 10 * SQUARE_NB + 1 - }; - - constexpr uint32_t PieceSquareIndex[COLOR_NB][PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE, - PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE }, - { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_NONE, PS_NONE, - PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_NONE, PS_NONE } - }; - // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; using IndexType = std::uint32_t; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index de4b4937..f4412749 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -23,7 +23,8 @@ #include "nnue_common.h" #include "nnue_architecture.h" -#include "features/index_list.h" + +#include "../misc.h" #include // std::memset() @@ -96,7 +97,7 @@ namespace Stockfish::Eval::NNUE { using OutputType = TransformedFeatureType; // Number of input/output dimensions - static constexpr IndexType InputDimensions = RawFeatures::Dimensions; + static constexpr IndexType InputDimensions = FeatureSet::Dimensions; static constexpr IndexType OutputDimensions = HalfDimensions * 2; // Size of forward propagation buffer @@ -105,7 +106,7 @@ namespace Stockfish::Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value() { - return RawFeatures::HashValue ^ OutputDimensions; + return FeatureSet::HashValue ^ OutputDimensions; } // Read network parameters @@ -161,9 +162,9 @@ namespace Stockfish::Eval::NNUE { auto out = reinterpret_cast<__m512i*>(&output[offset]); for (IndexType j = 0; j < NumChunks; ++j) { __m512i sum0 = _mm512_load_si512( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + &reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 0]); __m512i sum1 = _mm512_load_si512( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + &reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 1]); _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control, _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero))); } @@ -172,9 +173,9 @@ namespace Stockfish::Eval::NNUE { auto out = reinterpret_cast<__m256i*>(&output[offset]); for (IndexType j = 0; j < NumChunks; ++j) { __m256i sum0 = _mm256_load_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + &reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 0]); __m256i sum1 = _mm256_load_si256( - &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + &reinterpret_cast(accumulation[perspectives[p]])[j * 2 + 1]); _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( _mm256_packs_epi16(sum0, sum1), Zero), Control)); } @@ -183,9 +184,9 @@ namespace Stockfish::Eval::NNUE { auto out = reinterpret_cast<__m128i*>(&output[offset]); for (IndexType j = 0; j < NumChunks; ++j) { __m128i sum0 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); + accumulation[perspectives[p]])[j * 2 + 0]); __m128i sum1 = _mm_load_si128(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); + accumulation[perspectives[p]])[j * 2 + 1]); const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); _mm_store_si128(&out[j], @@ -203,9 +204,9 @@ namespace Stockfish::Eval::NNUE { auto out = reinterpret_cast<__m64*>(&output[offset]); for (IndexType j = 0; j < NumChunks; ++j) { __m64 sum0 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); + accumulation[perspectives[p]])[j * 2 + 0]); __m64 sum1 = *(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); + accumulation[perspectives[p]])[j * 2 + 1]); const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); } @@ -214,13 +215,13 @@ namespace Stockfish::Eval::NNUE { const auto out = reinterpret_cast(&output[offset]); for (IndexType j = 0; j < NumChunks; ++j) { int16x8_t sum = reinterpret_cast( - accumulation[perspectives[p]][0])[j]; + accumulation[perspectives[p]])[j]; out[j] = vmax_s8(vqmovn_s16(sum), Zero); } #else for (IndexType j = 0; j < HalfDimensions; ++j) { - BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; + BiasType sum = accumulation[static_cast(perspectives[p])][j]; output[offset + j] = static_cast( std::max(0, std::min(127, sum))); } @@ -233,7 +234,13 @@ namespace Stockfish::Eval::NNUE { } private: - void update_accumulator(const Position& pos, const Color c) const { + void update_accumulator(const Position& pos, const Color perspective) const { + + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never + // allow updates with more added/removed features than MaxActiveDimensions. + using IndexList = ValueList; #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array @@ -244,23 +251,19 @@ namespace Stockfish::Eval::NNUE { // Look for a usable accumulator of an earlier position. We keep track // of the estimated gain in terms of features to be added/subtracted. StateInfo *st = pos.state(), *next = nullptr; - int gain = pos.count() - 2; - while (st->accumulator.state[c] == EMPTY) + int gain = FeatureSet::refresh_cost(pos); + while (st->accumulator.state[perspective] == EMPTY) { - auto& dp = st->dirtyPiece; - // The first condition tests whether an incremental update is - // possible at all: if this side's king has moved, it is not possible. - static_assert(std::is_same_v>, - "Current code assumes that only FriendlyKingMoved refresh trigger is being used."); - if ( dp.piece[0] == make_piece(c, KING) - || (gain -= dp.dirty_num + 1) < 0) + // This governs when a full feature refresh is needed and how many + // updates are better than just one full refresh. + if ( FeatureSet::requires_refresh(st, perspective) + || (gain -= FeatureSet::update_cost(st) + 1) < 0) break; next = st; st = st->previous; } - if (st->accumulator.state[c] == COMPUTED) + if (st->accumulator.state[perspective] == COMPUTED) { if (next == nullptr) return; @@ -268,34 +271,32 @@ namespace Stockfish::Eval::NNUE { // Update incrementally in two steps. First, we update the "next" // accumulator. Then, we update the current accumulator (pos.state()). - // Gather all features to be updated. This code assumes HalfKP features - // only and doesn't support refresh triggers. - static_assert(std::is_same_v>, - RawFeatures>); - Features::IndexList removed[2], added[2]; - Features::HalfKP::append_changed_indices(pos, - next->dirtyPiece, c, &removed[0], &added[0]); + // Gather all features to be updated. + const Square ksq = pos.square(perspective); + IndexList removed[2], added[2]; + FeatureSet::append_changed_indices( + ksq, next, perspective, removed[0], added[0]); for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) - Features::HalfKP::append_changed_indices(pos, - st2->dirtyPiece, c, &removed[1], &added[1]); + FeatureSet::append_changed_indices( + ksq, st2, perspective, removed[1], added[1]); // Mark the accumulators as computed. - next->accumulator.state[c] = COMPUTED; - pos.state()->accumulator.state[c] = COMPUTED; + next->accumulator.state[perspective] = COMPUTED; + pos.state()->accumulator.state[perspective] = COMPUTED; - // Now update the accumulators listed in info[], where the last element is a sentinel. - StateInfo *info[3] = + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + StateInfo *states_to_update[3] = { next, next == pos.state() ? nullptr : pos.state(), nullptr }; #ifdef VECTOR for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { // Load accumulator auto accTile = reinterpret_cast( - &st->accumulator.accumulation[c][0][j * TileHeight]); + &st->accumulator.accumulation[perspective][j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_load(&accTile[k]); - for (IndexType i = 0; info[i]; ++i) + for (IndexType i = 0; states_to_update[i]; ++i) { // Difference calculation for the deactivated features for (const auto index : removed[i]) @@ -317,19 +318,19 @@ namespace Stockfish::Eval::NNUE { // Store accumulator accTile = reinterpret_cast( - &info[i]->accumulator.accumulation[c][0][j * TileHeight]); + &states_to_update[i]->accumulator.accumulation[perspective][j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) vec_store(&accTile[k], acc[k]); } } #else - for (IndexType i = 0; info[i]; ++i) + for (IndexType i = 0; states_to_update[i]; ++i) { - std::memcpy(info[i]->accumulator.accumulation[c][0], - st->accumulator.accumulation[c][0], + std::memcpy(states_to_update[i]->accumulator.accumulation[perspective], + st->accumulator.accumulation[perspective], HalfDimensions * sizeof(BiasType)); - st = info[i]; + st = states_to_update[i]; // Difference calculation for the deactivated features for (const auto index : removed[i]) @@ -337,7 +338,7 @@ namespace Stockfish::Eval::NNUE { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[c][0][j] -= weights[offset + j]; + st->accumulator.accumulation[perspective][j] -= weights[offset + j]; } // Difference calculation for the activated features @@ -346,7 +347,7 @@ namespace Stockfish::Eval::NNUE { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[c][0][j] += weights[offset + j]; + st->accumulator.accumulation[perspective][j] += weights[offset + j]; } } #endif @@ -355,9 +356,9 @@ namespace Stockfish::Eval::NNUE { { // Refresh the accumulator auto& accumulator = pos.state()->accumulator; - accumulator.state[c] = COMPUTED; - Features::IndexList active; - Features::HalfKP::append_active_indices(pos, c, &active); + accumulator.state[perspective] = COMPUTED; + IndexList active; + FeatureSet::append_active_indices(pos, perspective, active); #ifdef VECTOR for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) @@ -377,13 +378,13 @@ namespace Stockfish::Eval::NNUE { } auto accTile = reinterpret_cast( - &accumulator.accumulation[c][0][j * TileHeight]); + &accumulator.accumulation[perspective][j * TileHeight]); for (unsigned k = 0; k < NumRegs; k++) vec_store(&accTile[k], acc[k]); } #else - std::memcpy(accumulator.accumulation[c][0], biases, + std::memcpy(accumulator.accumulation[perspective], biases, HalfDimensions * sizeof(BiasType)); for (const auto index : active) @@ -391,7 +392,7 @@ namespace Stockfish::Eval::NNUE { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[c][0][j] += weights[offset + j]; + accumulator.accumulation[perspective][j] += weights[offset + j]; } #endif } @@ -405,8 +406,7 @@ namespace Stockfish::Eval::NNUE { using WeightType = std::int16_t; alignas(CacheLineSize) BiasType biases[HalfDimensions]; - alignas(CacheLineSize) - WeightType weights[HalfDimensions * InputDimensions]; + alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; }; } // namespace Stockfish::Eval::NNUE -- 2.39.2