X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fnnue%2Fnnue_feature_transformer.h;h=1e0b0e6da55112dbcddf78bddeb31668564f5e66;hb=7dcfc50bde9ed7499d43b469e12ba7f456bf0d25;hp=c3f012e412ec2415b726a469d080378b8049526d;hpb=3f6451eff7c62e8d4a33c5b11f055a81b3da8387;p=stockfish

diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
index c3f012e4..1e0b0e6d 100644
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -1,6 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -27,7 +27,7 @@
 
 #include <cstring> // std::memset()
 
-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {
 
   // If vector instructions are enabled, we update and refresh the
   // accumulator tile by tile such that each tile fits in the CPU's
@@ -127,7 +127,13 @@ namespace Eval::NNUE {
 
       const auto& accumulation = pos.state()->accumulator.accumulation;
 
-  #if defined(USE_AVX2)
+  #if defined(USE_AVX512)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
+      static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
+      const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
+      const __m512i kZero = _mm512_setzero_si512();
+
+  #elif defined(USE_AVX2)
       constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
       constexpr int kControl = 0b11011000;
       const __m256i kZero = _mm256_setzero_si256();
@@ -154,13 +160,24 @@ namespace Eval::NNUE {
       for (IndexType p = 0; p < 2; ++p) {
         const IndexType offset = kHalfDimensions * p;
 
-  #if defined(USE_AVX2)
+  #if defined(USE_AVX512)
+        auto out = reinterpret_cast<__m512i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m512i sum0 = _mm512_load_si512(
+              &reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m512i sum1 = _mm512_load_si512(
+              &reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+          _mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
+              _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
+        }
+
+  #elif defined(USE_AVX2)
         auto out = reinterpret_cast<__m256i*>(&output[offset]);
         for (IndexType j = 0; j < kNumChunks; ++j) {
           __m256i sum0 = _mm256_load_si256(
               &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
           __m256i sum1 = _mm256_load_si256(
-            &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
           _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
               _mm256_packs_epi16(sum0, sum1), kZero), kControl));
         }
@@ -177,9 +194,9 @@ namespace Eval::NNUE {
           _mm_store_si128(&out[j],
 
   #ifdef USE_SSE41
-            _mm_max_epi8(packedbytes, kZero)
+              _mm_max_epi8(packedbytes, kZero)
   #else
-            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+              _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
   #endif
 
           );
@@ -230,7 +247,7 @@ namespace Eval::NNUE {
       // Look for a usable accumulator of an earlier position. We keep track
       // of the estimated gain in terms of features to be added/subtracted.
       StateInfo *st = pos.state(), *next = nullptr;
-      int gain = popcount(pos.pieces()) - 2;
+      int gain = pos.count<ALL_PIECES>() - 2;
       while (st->accumulator.state[c] == EMPTY)
       {
         auto& dp = st->dirtyPiece;
@@ -395,6 +412,6 @@ namespace Eval::NNUE {
         WeightType weights_[kHalfDimensions * kInputDimensions];
   };
 
-}  // namespace Eval::NNUE
+}  // namespace Stockfish::Eval::NNUE
 
 #endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED