X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Fnnue_common.h;h=74eaae17c2dadecb804f169b204984fe42cc3adb;hp=7bc905dc751c66aaefa8f748cf90df9160a3b344;hb=4b86ef8c4f8755850b38f2eca026cb9da20c4d01;hpb=9b4967071e2fb116673820127522bc43d01d2257 diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 7bc905dc..74eaae17 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +24,8 @@ #include #include +#include "../misc.h" // for IsLittleEndian + #if defined(USE_AVX2) #include @@ -43,77 +45,33 @@ #include #endif -// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary -// compiled with older g++ crashes because the output memory is not aligned -// even though alignas is specified. -#if defined(USE_AVX2) -#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__) -#define _mm256_loadA_si256 _mm256_loadu_si256 -#define _mm256_storeA_si256 _mm256_storeu_si256 -#else -#define _mm256_loadA_si256 _mm256_load_si256 -#define _mm256_storeA_si256 _mm256_store_si256 -#endif -#endif - -#if defined(USE_AVX512) -#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__) -#define _mm512_loadA_si512 _mm512_loadu_si512 -#define _mm512_storeA_si512 _mm512_storeu_si512 -#else -#define _mm512_loadA_si512 _mm512_load_si512 -#define _mm512_storeA_si512 _mm512_store_si512 -#endif -#endif - -namespace Eval::NNUE { +namespace Stockfish::Eval::NNUE { // Version of the evaluation file - constexpr std::uint32_t kVersion = 0x7AF32F16u; + constexpr std::uint32_t Version = 0x7AF32F20u; // Constant used in evaluation value calculation - constexpr int FV_SCALE = 16; - constexpr int kWeightScaleBits = 6; + constexpr int OutputScale = 16; + constexpr int WeightScaleBits = 6; // Size of cache line (in bytes) - constexpr std::size_t kCacheLineSize = 64; + constexpr std::size_t CacheLineSize = 64; // SIMD width (in bytes) #if defined(USE_AVX2) - constexpr std::size_t kSimdWidth = 32; + constexpr std::size_t SimdWidth = 32; #elif defined(USE_SSE2) - constexpr std::size_t kSimdWidth = 16; + constexpr std::size_t SimdWidth = 16; #elif defined(USE_MMX) - constexpr std::size_t kSimdWidth = 8; + constexpr std::size_t SimdWidth = 8; #elif defined(USE_NEON) - constexpr std::size_t kSimdWidth = 16; + constexpr std::size_t SimdWidth = 16; #endif - constexpr std::size_t kMaxSimdWidth = 32; - - // unique number for each piece type on each square - enum { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_W_KING = 10 * SQUARE_NB + 1, - PS_END = PS_W_KING, // pieces without kings (pawns included) - PS_B_KING = 11 * SQUARE_NB + 1, - PS_END2 = 12 * SQUARE_NB + 1 - }; - - extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + constexpr std::size_t MaxSimdWidth = 32; // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; @@ -121,7 +79,7 @@ namespace Eval::NNUE { // Round n up to be a multiple of base template - constexpr IntType CeilToMultiple(IntType n, IntType base) { + constexpr IntType ceil_to_multiple(IntType n, IntType base) { return (n + base - 1) / base * base; } @@ -130,19 +88,77 @@ namespace Eval::NNUE { // necessary to return a result with the byte ordering of the compiling machine. template inline IntType read_little_endian(std::istream& stream) { - IntType result; - std::uint8_t u[sizeof(IntType)]; - typename std::make_unsigned::type v = 0; - stream.read(reinterpret_cast(u), sizeof(IntType)); - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; + if (IsLittleEndian) + stream.read(reinterpret_cast(&result), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + } - std::memcpy(&result, &v, sizeof(IntType)); return result; } -} // namespace Eval::NNUE + // write_little_endian() is our utility to write an integer (signed or unsigned, any size) + // to a stream in little-endian order. We swap the byte order before the write if + // necessary to always write in little endian order, independently of the byte + // ordering of the compiling machine. + template + inline void write_little_endian(std::ostream& stream, IntType value) { + + if (IsLittleEndian) + stream.write(reinterpret_cast(&value), sizeof(IntType)); + else + { + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = value; + + std::size_t i = 0; + // if constexpr to silence the warning about shift by 8 + if constexpr (sizeof(IntType) > 1) + { + for (; i + 1 < sizeof(IntType); ++i) + { + u[i] = v; + v >>= 8; + } + } + u[i] = v; + + stream.write(reinterpret_cast(u), sizeof(IntType)); + } + } + + // read_little_endian(s, out, N) : read integers in bulk from a little indian stream. + // This reads N integers from stream s and put them in array out. + template + inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { + if (IsLittleEndian) + stream.read(reinterpret_cast(out), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + out[i] = read_little_endian(stream); + } + + // write_little_endian(s, values, N) : write integers in bulk to a little indian stream. + // This takes N integers from array values and writes them on stream s. + template + inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { + if (IsLittleEndian) + stream.write(reinterpret_cast(values), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + write_little_endian(stream, values[i]); + } + +} // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED