X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fnnue%2Fnnue_common.h;h=61f18aeec848be9b25edde2b68733e97c8cf254b;hb=72dc7a5c54554a8c7c4bf68aa7de2d4de05f3294;hp=972ef3e50c66291cb50039741e5a24f0c91ba6a1;hpb=84f3e867903f62480c33243dd0ecbffd342796fc;p=stockfish diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 972ef3e5..61f18aee 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -21,6 +21,9 @@ #ifndef NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED +#include +#include + #if defined(USE_AVX2) #include @@ -33,10 +36,36 @@ #elif defined(USE_SSE2) #include +#elif defined(USE_MMX) +#include + #elif defined(USE_NEON) #include #endif +// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary +// compiled with older g++ crashes because the output memory is not aligned +// even though alignas is specified. +#if defined(USE_AVX2) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) +#define _mm256_loadA_si256 _mm256_loadu_si256 +#define _mm256_storeA_si256 _mm256_storeu_si256 +#else +#define _mm256_loadA_si256 _mm256_load_si256 +#define _mm256_storeA_si256 _mm256_store_si256 +#endif +#endif + +#if defined(USE_AVX512) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) +#define _mm512_loadA_si512 _mm512_loadu_si512 +#define _mm512_storeA_si512 _mm512_storeu_si512 +#else +#define _mm512_loadA_si512 _mm512_load_si512 +#define _mm512_storeA_si512 _mm512_store_si512 +#endif +#endif + namespace Eval::NNUE { // Version of the evaluation file @@ -56,6 +85,9 @@ namespace Eval::NNUE { #elif defined(USE_SSE2) constexpr std::size_t kSimdWidth = 16; + #elif defined(USE_MMX) + constexpr std::size_t kSimdWidth = 8; + #elif defined(USE_NEON) constexpr std::size_t kSimdWidth = 16; #endif @@ -72,6 +104,22 @@ namespace Eval::NNUE { return (n + base - 1) / base * base; } + // Read a signed or unsigned integer from a stream in little-endian order + template + inline IntType read_le(std::istream& stream) { + // Read the relevant bytes from the stream in little-endian order + std::uint8_t u[sizeof(IntType)]; + stream.read(reinterpret_cast(u), sizeof(IntType)); + // Use unsigned arithmetic to convert to machine order + typename std::make_unsigned::type v = 0; + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + // Copy the machine-ordered bytes into a potentially signed value + IntType w; + std::memcpy(&w, &v, sizeof(IntType)); + return w; + } + } // namespace Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED