X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Fnnue_common.h;h=74eaae17c2dadecb804f169b204984fe42cc3adb;hp=7bc905dc751c66aaefa8f748cf90df9160a3b344;hb=4b86ef8c4f8755850b38f2eca026cb9da20c4d01;hpb=9b4967071e2fb116673820127522bc43d01d2257

diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h
index 7bc905dc..74eaae17 100644
--- a/src/nnue/nnue_common.h
+++ b/src/nnue/nnue_common.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -24,6 +24,8 @@
 #include <cstring>
 #include <iostream>
 
+#include "../misc.h"  // for IsLittleEndian
+
 #if defined(USE_AVX2)
 #include <immintrin.h>
 
@@ -43,77 +45,33 @@
 #include <arm_neon.h>
 #endif
 
-// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
-//       compiled with older g++ crashes because the output memory is not aligned
-//       even though alignas is specified.
-#if defined(USE_AVX2)
-#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
-#define _mm256_loadA_si256  _mm256_loadu_si256
-#define _mm256_storeA_si256 _mm256_storeu_si256
-#else
-#define _mm256_loadA_si256  _mm256_load_si256
-#define _mm256_storeA_si256 _mm256_store_si256
-#endif
-#endif
-
-#if defined(USE_AVX512)
-#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
-#define _mm512_loadA_si512   _mm512_loadu_si512
-#define _mm512_storeA_si512  _mm512_storeu_si512
-#else
-#define _mm512_loadA_si512   _mm512_load_si512
-#define _mm512_storeA_si512  _mm512_store_si512
-#endif
-#endif
-
-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {
 
   // Version of the evaluation file
-  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+  constexpr std::uint32_t Version = 0x7AF32F20u;
 
   // Constant used in evaluation value calculation
-  constexpr int FV_SCALE = 16;
-  constexpr int kWeightScaleBits = 6;
+  constexpr int OutputScale = 16;
+  constexpr int WeightScaleBits = 6;
 
   // Size of cache line (in bytes)
-  constexpr std::size_t kCacheLineSize = 64;
+  constexpr std::size_t CacheLineSize = 64;
 
   // SIMD width (in bytes)
   #if defined(USE_AVX2)
-  constexpr std::size_t kSimdWidth = 32;
+  constexpr std::size_t SimdWidth = 32;
 
   #elif defined(USE_SSE2)
-  constexpr std::size_t kSimdWidth = 16;
+  constexpr std::size_t SimdWidth = 16;
 
   #elif defined(USE_MMX)
-  constexpr std::size_t kSimdWidth = 8;
+  constexpr std::size_t SimdWidth = 8;
 
   #elif defined(USE_NEON)
-  constexpr std::size_t kSimdWidth = 16;
+  constexpr std::size_t SimdWidth = 16;
   #endif
 
-  constexpr std::size_t kMaxSimdWidth = 32;
-
-  // unique number for each piece type on each square
-  enum {
-    PS_NONE     =  0,
-    PS_W_PAWN   =  1,
-    PS_B_PAWN   =  1 * SQUARE_NB + 1,
-    PS_W_KNIGHT =  2 * SQUARE_NB + 1,
-    PS_B_KNIGHT =  3 * SQUARE_NB + 1,
-    PS_W_BISHOP =  4 * SQUARE_NB + 1,
-    PS_B_BISHOP =  5 * SQUARE_NB + 1,
-    PS_W_ROOK   =  6 * SQUARE_NB + 1,
-    PS_B_ROOK   =  7 * SQUARE_NB + 1,
-    PS_W_QUEEN  =  8 * SQUARE_NB + 1,
-    PS_B_QUEEN  =  9 * SQUARE_NB + 1,
-    PS_W_KING   = 10 * SQUARE_NB + 1,
-    PS_END      = PS_W_KING, // pieces without kings (pawns included)
-    PS_B_KING   = 11 * SQUARE_NB + 1,
-    PS_END2     = 12 * SQUARE_NB + 1
-  };
-
-  extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
+  constexpr std::size_t MaxSimdWidth = 32;
 
   // Type of input feature after conversion
   using TransformedFeatureType = std::uint8_t;
@@ -121,7 +79,7 @@ namespace Eval::NNUE {
 
   // Round n up to be a multiple of base
   template <typename IntType>
-  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+  constexpr IntType ceil_to_multiple(IntType n, IntType base) {
       return (n + base - 1) / base * base;
   }
 
@@ -130,19 +88,77 @@ namespace Eval::NNUE {
   // necessary to return a result with the byte ordering of the compiling machine.
   template <typename IntType>
   inline IntType read_little_endian(std::istream& stream) {
-
       IntType result;
-      std::uint8_t u[sizeof(IntType)];
-      typename std::make_unsigned<IntType>::type v = 0;
 
-      stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
-      for (std::size_t i = 0; i < sizeof(IntType); ++i)
-          v = (v << 8) | u[sizeof(IntType) - i - 1];
+      if (IsLittleEndian)
+          stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
+      else
+      {
+          std::uint8_t u[sizeof(IntType)];
+          typename std::make_unsigned<IntType>::type v = 0;
+
+          stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
+          for (std::size_t i = 0; i < sizeof(IntType); ++i)
+              v = (v << 8) | u[sizeof(IntType) - i - 1];
+
+          std::memcpy(&result, &v, sizeof(IntType));
+      }
 
-      std::memcpy(&result, &v, sizeof(IntType));
       return result;
   }
 
-}  // namespace Eval::NNUE
+  // write_little_endian() is our utility to write an integer (signed or unsigned, any size)
+  // to a stream in little-endian order. We swap the byte order before the write if
+  // necessary to always write in little endian order, independently of the byte
+  // ordering of the compiling machine.
+  template <typename IntType>
+  inline void write_little_endian(std::ostream& stream, IntType value) {
+
+      if (IsLittleEndian)
+          stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
+      else
+      {
+          std::uint8_t u[sizeof(IntType)];
+          typename std::make_unsigned<IntType>::type v = value;
+
+          std::size_t i = 0;
+          // if constexpr to silence the warning about shift by 8
+          if constexpr (sizeof(IntType) > 1)
+          {
+            for (; i + 1 < sizeof(IntType); ++i)
+            {
+                u[i] = v;
+                v >>= 8;
+            }
+          }
+          u[i] = v;
+
+          stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
+      }
+  }
+
+  // read_little_endian(s, out, N) : read integers in bulk from a little indian stream.
+  // This reads N integers from stream s and put them in array out.
+  template <typename IntType>
+  inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
+      if (IsLittleEndian)
+          stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
+      else
+          for (std::size_t i = 0; i < count; ++i)
+              out[i] = read_little_endian<IntType>(stream);
+  }
+
+  // write_little_endian(s, values, N) : write integers in bulk to a little indian stream.
+  // This takes N integers from array values and writes them on stream s.
+  template <typename IntType>
+  inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
+      if (IsLittleEndian)
+          stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
+      else
+          for (std::size_t i = 0; i < count; ++i)
+              write_little_endian<IntType>(stream, values[i]);
+  }
+
+}  // namespace Stockfish::Eval::NNUE
 
 #endif // #ifndef NNUE_COMMON_H_INCLUDED