From 82bb21dc7a198609589ef0cc78d185f00f619a90 Mon Sep 17 00:00:00 2001 From: mstembera Date: Tue, 6 Sep 2022 15:02:35 -0700 Subject: [PATCH] Optimize AVX2 path in NNUE evaluation always selecting AffineTransform specialization for small inputs. A related patch was tested as Initially tested as a simplification STC https://tests.stockfishchess.org/tests/view/6317c3f437f41b13973d6dff LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 58072 W: 15619 L: 15425 D: 27028 Ptnml(0-2): 241, 6191, 15992, 6357, 255 Elo gain speedup test STC https://tests.stockfishchess.org/tests/view/63181c1b37f41b13973d79dc LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 184496 W: 49922 L: 49401 D: 85173 Ptnml(0-2): 851, 19397, 51208, 19964, 828 and this patch gained in testing speedup = +0.0071 P(speedup > 0) = 1.0000 on CPU: 16 x AMD Ryzen 9 3950X closes https://github.com/official-stockfish/Stockfish/pull/4158 No functional change --- src/nnue/layers/affine_transform.h | 16 +++++++++++----- src/{ => nnue/layers}/simd.h | 0 2 files changed, 11 insertions(+), 5 deletions(-) rename src/{ => nnue/layers}/simd.h (100%) diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 9a992608..461a7b83 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -25,7 +25,7 @@ #include #include #include "../nnue_common.h" -#include "../../simd.h" +#include "simd.h" /* This file contains the definition for a fully connected layer (aka affine transform). @@ -151,9 +151,15 @@ namespace Stockfish::Eval::NNUE::Layers { template class AffineTransform; +#if defined (USE_AVX512) + constexpr IndexType LargeInputSize = 2 * 64; +#else + constexpr IndexType LargeInputSize = std::numeric_limits::max(); +#endif + // A specialization for large inputs. template - class AffineTransform(InDims, MaxSimdWidth) >= 2*64)>> { + class AffineTransform(InDims, MaxSimdWidth) >= LargeInputSize)>> { public: // Input/output type using InputType = std::uint8_t; @@ -170,7 +176,7 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen."); + static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen."); #if defined (USE_AVX512) static constexpr const IndexType InputSimdWidth = 64; @@ -369,7 +375,7 @@ namespace Stockfish::Eval::NNUE::Layers { }; template - class AffineTransform(InDims, MaxSimdWidth) < 2*64)>> { + class AffineTransform(InDims, MaxSimdWidth) < LargeInputSize)>> { public: // Input/output type // Input/output type @@ -387,7 +393,7 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen."); + static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen."); #if defined (USE_SSSE3) static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; diff --git a/src/simd.h b/src/nnue/layers/simd.h similarity index 100% rename from src/simd.h rename to src/nnue/layers/simd.h -- 2.39.2