Optimize AVX2 path in NNUE evaluation

author mstembera <MissingEmail@email>

Tue, 6 Sep 2022 22:02:35 +0000 (15:02 -0700)

committer Joost VandeVondele <Joost.VandeVondele@gmail.com>

Sun, 11 Sep 2022 12:19:57 +0000 (14:19 +0200)
author mstembera <MissingEmail@email>
Tue, 6 Sep 2022 22:02:35 +0000 (15:02 -0700)
committer Joost VandeVondele <Joost.VandeVondele@gmail.com>
Sun, 11 Sep 2022 12:19:57 +0000 (14:19 +0200)
diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h

index 9a992608cc1a06a9808c4c96228a696bde3cc280..461a7b83eca829a9a45c5d215e11dd5991280af6 100644 (file)
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@@ -25,7 +25,7 @@
  #include <algorithm>
  #include <type_traits>
  #include "../nnue_common.h"
  #include <algorithm>
  #include <type_traits>
  #include "../nnue_common.h"
-#include "../../simd.h"
+#include "simd.h"
  
  /*
    This file contains the definition for a fully connected layer (aka affine transform).
  
  /*
    This file contains the definition for a fully connected layer (aka affine transform).
@@ -151,9 +151,15 @@ namespace Stockfish::Eval::NNUE::Layers {
    template <IndexType InDims, IndexType OutDims, typename Enabled = void>
    class AffineTransform;
  
    template <IndexType InDims, IndexType OutDims, typename Enabled = void>
    class AffineTransform;
  
+#if defined (USE_AVX512)
+  constexpr IndexType LargeInputSize = 2 * 64;
+#else
+  constexpr IndexType LargeInputSize = std::numeric_limits<IndexType>::max();
+#endif
+
    // A specialization for large inputs.
    template <IndexType InDims, IndexType OutDims>
    // A specialization for large inputs.
    template <IndexType InDims, IndexType OutDims>
-  class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= 2*64)>> {
+  class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= LargeInputSize)>> {
     public:
      // Input/output type
      using InputType = std::uint8_t;
     public:
      // Input/output type
      using InputType = std::uint8_t;
@@ -170,7 +176,7 @@ namespace Stockfish::Eval::NNUE::Layers {
  
      using OutputBuffer = OutputType[PaddedOutputDimensions];
  
  
      using OutputBuffer = OutputType[PaddedOutputDimensions];
  
-    static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen.");
+    static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen.");
  
  #if defined (USE_AVX512)
      static constexpr const IndexType InputSimdWidth = 64;
  
  #if defined (USE_AVX512)
      static constexpr const IndexType InputSimdWidth = 64;
@@ -369,7 +375,7 @@ namespace Stockfish::Eval::NNUE::Layers {
    };
  
    template <IndexType InDims, IndexType OutDims>
    };
  
    template <IndexType InDims, IndexType OutDims>
-  class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < 2*64)>> {
+  class AffineTransform<InDims, OutDims, std::enable_if_t<(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < LargeInputSize)>> {
     public:
      // Input/output type
      // Input/output type
     public:
      // Input/output type
      // Input/output type
@@ -387,7 +393,7 @@ namespace Stockfish::Eval::NNUE::Layers {
  
      using OutputBuffer = OutputType[PaddedOutputDimensions];
  
  
      using OutputBuffer = OutputType[PaddedOutputDimensions];
  
-    static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen.");
+    static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen.");
  
  #if defined (USE_SSSE3)
      static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
  
  #if defined (USE_SSSE3)
      static constexpr const IndexType OutputSimdWidth = SimdWidth / 4;
diff --git a/src/simd.h b/src/nnue/layers/simd.h

similarity index 100%

rename from src/simd.h

rename to src/nnue/layers/simd.h
author	mstembera <MissingEmail@email>
	Tue, 6 Sep 2022 22:02:35 +0000 (15:02 -0700)
committer	Joost VandeVondele <Joost.VandeVondele@gmail.com>
	Sun, 11 Sep 2022 12:19:57 +0000 (14:19 +0200)
src/nnue/layers/affine_transform.h		patch \| blob \| history
src/nnue/layers/simd.h	[moved from src/simd.h with 100% similarity]	patch \| blob \| history