X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fnnue%2Fevaluate_nnue.cpp;h=e7339c10ae982be1200b0c519ecfab9c673eb074;hb=a069a1bbbfb60abddbe3fe5276b06f35f783f41c;hp=3aa85943944c41b25ed33a918d547dddec6eaad8;hpb=72dc7a5c54554a8c7c4bf68aa7de2d4de05f3294;p=stockfish diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 3aa85943..e7339c10 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,158 +18,412 @@ // Code for calculating NNUE evaluation function +#include "evaluate_nnue.h" + +#include +#include +#include #include +#include #include -#include +#include +#include #include "../evaluate.h" -#include "../position.h" #include "../misc.h" +#include "../position.h" +#include "../types.h" #include "../uci.h" +#include "nnue_accumulator.h" +#include "nnue_common.h" -#include "evaluate_nnue.h" - -ExtPieceSquare kpp_board_index[PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_NONE }, - { PS_W_PAWN, PS_B_PAWN }, - { PS_W_KNIGHT, PS_B_KNIGHT }, - { PS_W_BISHOP, PS_B_BISHOP }, - { PS_W_ROOK, PS_B_ROOK }, - { PS_W_QUEEN, PS_B_QUEEN }, - { PS_W_KING, PS_B_KING }, - { PS_NONE, PS_NONE }, - { PS_NONE, PS_NONE }, - { PS_B_PAWN, PS_W_PAWN }, - { PS_B_KNIGHT, PS_W_KNIGHT }, - { PS_B_BISHOP, PS_W_BISHOP }, - { PS_B_ROOK, PS_W_ROOK }, - { PS_B_QUEEN, PS_W_QUEEN }, - { PS_B_KING, PS_W_KING }, - { PS_NONE, PS_NONE } -}; +namespace Stockfish::Eval::NNUE { +// Input feature converter +LargePagePtr featureTransformer; -namespace Eval::NNUE { +// Evaluation function +AlignedPtr network[LayerStacks]; - // Input feature converter - AlignedPtr feature_transformer; +// Evaluation function file name +std::string fileName; +std::string netDescription; - // Evaluation function - AlignedPtr network; +namespace Detail { - // Evaluation function file name - std::string fileName; +// Initialize the evaluation function parameters +template +void initialize(AlignedPtr& pointer) { - namespace Detail { + pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); +} - // Initialize the evaluation function parameters - template - void Initialize(AlignedPtr& pointer) { +template +void initialize(LargePagePtr& pointer) { - pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); + static_assert(alignof(T) <= 4096, + "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); std::memset(pointer.get(), 0, sizeof(T)); - } +} - // Read evaluation function parameters - template - bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { +// Read evaluation function parameters +template +bool read_parameters(std::istream& stream, T& reference) { std::uint32_t header; - header = read_le(stream); - if (!stream || header != T::GetHashValue()) return false; - return pointer->ReadParameters(stream); - } + header = read_little_endian(stream); + if (!stream || header != T::get_hash_value()) + return false; + return reference.read_parameters(stream); +} + +// Write evaluation function parameters +template +bool write_parameters(std::ostream& stream, const T& reference) { - } // namespace Detail + write_little_endian(stream, T::get_hash_value()); + return reference.write_parameters(stream); +} - // Initialize the evaluation function parameters - void Initialize() { +} // namespace Detail - Detail::Initialize(feature_transformer); - Detail::Initialize(network); - } - // Read network header - bool ReadHeader(std::istream& stream, - std::uint32_t* hash_value, std::string* architecture) { +// Initialize the evaluation function parameters +static void initialize() { + Detail::initialize(featureTransformer); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(network[i]); +} + +// Read network header +static bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) { std::uint32_t version, size; - version = read_le(stream); - *hash_value = read_le(stream); - size = read_le(stream); - if (!stream || version != kVersion) return false; - architecture->resize(size); - stream.read(&(*architecture)[0], size); + + version = read_little_endian(stream); + *hashValue = read_little_endian(stream); + size = read_little_endian(stream); + if (!stream || version != Version) + return false; + desc->resize(size); + stream.read(&(*desc)[0], size); return !stream.fail(); - } +} + +// Write network header +static bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) { + write_little_endian(stream, Version); + write_little_endian(stream, hashValue); + write_little_endian(stream, std::uint32_t(desc.size())); + stream.write(&desc[0], desc.size()); + return !stream.fail(); +} + +// Read network parameters +static bool read_parameters(std::istream& stream) { + + std::uint32_t hashValue; + if (!read_header(stream, &hashValue, &netDescription)) + return false; + if (hashValue != HashValue) + return false; + if (!Detail::read_parameters(stream, *featureTransformer)) + return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::read_parameters(stream, *(network[i]))) + return false; + return stream && stream.peek() == std::ios::traits_type::eof(); +} + +// Write network parameters +static bool write_parameters(std::ostream& stream) { + + if (!write_header(stream, HashValue, netDescription)) + return false; + if (!Detail::write_parameters(stream, *featureTransformer)) + return false; + for (std::size_t i = 0; i < LayerStacks; ++i) + if (!Detail::write_parameters(stream, *(network[i]))) + return false; + return bool(stream); +} + +void hint_common_parent_position(const Position& pos) { + featureTransformer->hint_common_access(pos); +} + +// Evaluation function. Perform differential calculation. +Value evaluate(const Position& pos, bool adjusted, int* complexity) { + + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + + constexpr uint64_t alignment = CacheLineSize; + constexpr int delta = 24; + +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; + + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); +#else + alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; +#endif + + ASSERT_ALIGNED(transformedFeatures, alignment); + + const int bucket = (pos.count() - 1) / 4; + const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); + + if (complexity) + *complexity = std::abs(psqt - positional) / OutputScale; + + // Give more value to positional evaluation when adjusted flag is set + if (adjusted) + return static_cast(((1024 - delta) * psqt + (1024 + delta) * positional) + / (1024 * OutputScale)); + else + return static_cast((psqt + positional) / OutputScale); +} + +struct NnueEvalTrace { + static_assert(LayerStacks == PSQTBuckets); + + Value psqt[LayerStacks]; + Value positional[LayerStacks]; + std::size_t correctBucket; +}; - // Read network parameters - bool ReadParameters(std::istream& stream) { +static NnueEvalTrace trace_evaluate(const Position& pos) { - std::uint32_t hash_value; - std::string architecture; - if (!ReadHeader(stream, &hash_value, &architecture)) return false; - if (hash_value != kHashValue) return false; - if (!Detail::ReadParameters(stream, feature_transformer)) return false; - if (!Detail::ReadParameters(stream, network)) return false; - return stream && stream.peek() == std::ios::traits_type::eof(); - } + // We manually align the arrays on the stack because with gcc < 9.3 + // overaligning stack variables with alignas() doesn't work correctly. + constexpr uint64_t alignment = CacheLineSize; - // Proceed with the difference calculation if possible - static void UpdateAccumulatorIfPossible(const Position& pos) { +#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) + TransformedFeatureType + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; - feature_transformer->UpdateAccumulatorIfPossible(pos); - } + auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); +#else + alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; +#endif - // Calculate the evaluation value - static Value ComputeScore(const Position& pos, bool refresh) { + ASSERT_ALIGNED(transformedFeatures, alignment); - auto& accumulator = pos.state()->accumulator; - if (!refresh && accumulator.computed_score) { - return accumulator.score; + NnueEvalTrace t{}; + t.correctBucket = (pos.count() - 1) / 4; + for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) + { + const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); + + t.psqt[bucket] = static_cast(materialist / OutputScale); + t.positional[bucket] = static_cast(positional / OutputScale); } - alignas(kCacheLineSize) TransformedFeatureType - transformed_features[FeatureTransformer::kBufferSize]; - feature_transformer->Transform(pos, transformed_features, refresh); - alignas(kCacheLineSize) char buffer[Network::kBufferSize]; - const auto output = network->Propagate(transformed_features, buffer); + return t; +} + +constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); + + +// Converts a Value into (centi)pawns and writes it in a buffer. +// The buffer must have capacity for at least 5 chars. +static void format_cp_compact(Value v, char* buffer) { + + buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - auto score = static_cast(output[0] / FV_SCALE); + int cp = std::abs(UCI::to_cp(v)); + if (cp >= 10000) + { + buffer[1] = '0' + cp / 10000; + cp %= 10000; + buffer[2] = '0' + cp / 1000; + cp %= 1000; + buffer[3] = '0' + cp / 100; + buffer[4] = ' '; + } + else if (cp >= 1000) + { + buffer[1] = '0' + cp / 1000; + cp %= 1000; + buffer[2] = '0' + cp / 100; + cp %= 100; + buffer[3] = '.'; + buffer[4] = '0' + cp / 10; + } + else + { + buffer[1] = '0' + cp / 100; + cp %= 100; + buffer[2] = '.'; + buffer[3] = '0' + cp / 10; + cp %= 10; + buffer[4] = '0' + cp / 1; + } +} + + +// Converts a Value into pawns, always keeping two decimals +static void format_cp_aligned_dot(Value v, std::stringstream& stream) { + + const double pawns = std::abs(0.01 * UCI::to_cp(v)); + + stream << (v < 0 ? '-' + : v > 0 ? '+' + : ' ') + << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns; +} + + +// Returns a string with the value of each piece on a board, +// and a table for (PSQT, Layers) values bucket by bucket. +std::string trace(Position& pos) { + + std::stringstream ss; + + char board[3 * 8 + 1][8 * 8 + 2]; + std::memset(board, ' ', sizeof(board)); + for (int row = 0; row < 3 * 8 + 1; ++row) + board[row][8 * 8 + 1] = '\0'; + + // A lambda to output one box of the board + auto writeSquare = [&board](File file, Rank rank, Piece pc, Value value) { + const int x = int(file) * 8; + const int y = (7 - int(rank)) * 3; + for (int i = 1; i < 8; ++i) + board[y][x + i] = board[y + 3][x + i] = '-'; + for (int i = 1; i < 3; ++i) + board[y + i][x] = board[y + i][x + 8] = '|'; + board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; + if (pc != NO_PIECE) + board[y + 1][x + 4] = PieceToChar[pc]; + if (value != VALUE_NONE) + format_cp_compact(value, &board[y + 2][x + 2]); + }; + + // We estimate the value of each piece by doing a differential evaluation from + // the current base eval, simulating the removal of the piece from its square. + Value base = evaluate(pos); + base = pos.side_to_move() == WHITE ? base : -base; + + for (File f = FILE_A; f <= FILE_H; ++f) + for (Rank r = RANK_1; r <= RANK_8; ++r) + { + Square sq = make_square(f, r); + Piece pc = pos.piece_on(sq); + Value v = VALUE_NONE; + + if (pc != NO_PIECE && type_of(pc) != KING) + { + auto st = pos.state(); + + pos.remove_piece(sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + + Value eval = evaluate(pos); + eval = pos.side_to_move() == WHITE ? eval : -eval; + v = base - eval; + + pos.put_piece(pc, sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + } + + writeSquare(f, r, pc, v); + } + + ss << " NNUE derived piece values:\n"; + for (int row = 0; row < 3 * 8 + 1; ++row) + ss << board[row] << '\n'; + ss << '\n'; + + auto t = trace_evaluate(pos); + + ss << " NNUE network contributions " + << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl + << "+------------+------------+------------+------------+\n" + << "| Bucket | Material | Positional | Total |\n" + << "| | (PSQT) | (Layers) | |\n" + << "+------------+------------+------------+------------+\n"; + + for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) + { + ss << "| " << bucket << " "; + ss << " | "; + format_cp_aligned_dot(t.psqt[bucket], ss); + ss << " " + << " | "; + format_cp_aligned_dot(t.positional[bucket], ss); + ss << " " + << " | "; + format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss); + ss << " " + << " |"; + if (bucket == t.correctBucket) + ss << " <-- this bucket is used"; + ss << '\n'; + } - accumulator.score = score; - accumulator.computed_score = true; - return accumulator.score; - } + ss << "+------------+------------+------------+------------+\n"; - // Load the evaluation function file - bool load_eval_file(const std::string& evalFile) { + return ss.str(); +} - Initialize(); - fileName = evalFile; - std::ifstream stream(evalFile, std::ios::binary); +// Load eval, from a file stream or a memory stream +bool load_eval(std::string name, std::istream& stream) { - const bool result = ReadParameters(stream); + initialize(); + fileName = name; + return read_parameters(stream); +} + +// Save eval, to a file stream or a memory stream +bool save_eval(std::ostream& stream) { + + if (fileName.empty()) + return false; + + return write_parameters(stream); +} + +// Save eval, to a file given by its name +bool save_eval(const std::optional& filename) { + + std::string actualFilename; + std::string msg; + + if (filename.has_value()) + actualFilename = filename.value(); + else + { + if (currentEvalFileName != EvalFileDefaultName) + { + msg = "Failed to export a net. " + "A non-embedded net can only be saved if the filename is specified"; + + sync_cout << msg << sync_endl; + return false; + } + actualFilename = EvalFileDefaultName; + } - return result; - } + std::ofstream stream(actualFilename, std::ios_base::binary); + bool saved = save_eval(stream); - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { - return ComputeScore(pos, false); - } + msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net"; - // Evaluation function. Perform full calculation. - Value compute_eval(const Position& pos) { - return ComputeScore(pos, true); - } + sync_cout << msg << sync_endl; + return saved; +} - // Proceed with the difference calculation if possible - void update_eval(const Position& pos) { - UpdateAccumulatorIfPossible(pos); - } -} // namespace Eval::NNUE +} // namespace Stockfish::Eval::NNUE