X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Fnnue_common.h;h=f9ff2bc81ee8f919903fef0f37217379a1304262;hp=e7ce84f7b9f420eaaa3dbadb4302eeb242315c4f;hb=9b7983a4521b66bf8d3c37ee58963d39deb2695c;hpb=875183b310a8249922c2155e82cb4cecfae2097e diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index e7ce84f7..f9ff2bc8 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -21,6 +21,9 @@ #ifndef NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED +#include +#include + #if defined(USE_AVX2) #include @@ -33,31 +36,13 @@ #elif defined(USE_SSE2) #include +#elif defined(USE_MMX) +#include + #elif defined(USE_NEON) #include #endif -// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary -// compiled with older g++ crashes because the output memory is not aligned -// even though alignas is specified. -#if defined(USE_AVX2) -#if defined(__GNUC__ ) && (__GNUC__ < 9) -#define _mm256_loadA_si256 _mm256_loadu_si256 -#define _mm256_storeA_si256 _mm256_storeu_si256 -#else -#define _mm256_loadA_si256 _mm256_load_si256 -#define _mm256_storeA_si256 _mm256_store_si256 -#endif -#endif - -#if defined(USE_AVX512) -#if defined(__GNUC__ ) && (__GNUC__ < 9) -#define _mm512_loadA_si512 _mm512_loadu_si512 -#else -#define _mm512_loadA_si512 _mm512_load_si512 -#endif -#endif - namespace Eval::NNUE { // Version of the evaluation file @@ -77,12 +62,43 @@ namespace Eval::NNUE { #elif defined(USE_SSE2) constexpr std::size_t kSimdWidth = 16; + #elif defined(USE_MMX) + constexpr std::size_t kSimdWidth = 8; + #elif defined(USE_NEON) constexpr std::size_t kSimdWidth = 16; #endif constexpr std::size_t kMaxSimdWidth = 32; + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_W_KING = 10 * SQUARE_NB + 1, + PS_END = PS_W_KING, // pieces without kings (pawns included) + PS_B_KING = 11 * SQUARE_NB + 1, + PS_END2 = 12 * SQUARE_NB + 1 + }; + + constexpr uint32_t kpp_board_index[COLOR_NB][PIECE_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE }, + { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE } + }; + // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; using IndexType = std::uint32_t; @@ -90,7 +106,25 @@ namespace Eval::NNUE { // Round n up to be a multiple of base template constexpr IntType CeilToMultiple(IntType n, IntType base) { - return (n + base - 1) / base * base; + return (n + base - 1) / base * base; + } + + // read_little_endian() is our utility to read an integer (signed or unsigned, any size) + // from a stream in little-endian order. We swap the byte order after the read if + // necessary to return a result with the byte ordering of the compiling machine. + template + inline IntType read_little_endian(std::istream& stream) { + + IntType result; + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + return result; } } // namespace Eval::NNUE