From: mstembera Date: Tue, 6 Sep 2022 22:02:35 +0000 (-0700) Subject: Optimize AVX2 path in NNUE evaluation X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=82bb21dc7a198609589ef0cc78d185f00f619a90 Optimize AVX2 path in NNUE evaluation always selecting AffineTransform specialization for small inputs. A related patch was tested as Initially tested as a simplification STC https://tests.stockfishchess.org/tests/view/6317c3f437f41b13973d6dff LLR: 2.95 (-2.94,2.94) <-1.75,0.25> Total: 58072 W: 15619 L: 15425 D: 27028 Ptnml(0-2): 241, 6191, 15992, 6357, 255 Elo gain speedup test STC https://tests.stockfishchess.org/tests/view/63181c1b37f41b13973d79dc LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 184496 W: 49922 L: 49401 D: 85173 Ptnml(0-2): 851, 19397, 51208, 19964, 828 and this patch gained in testing speedup = +0.0071 P(speedup > 0) = 1.0000 on CPU: 16 x AMD Ryzen 9 3950X closes https://github.com/official-stockfish/Stockfish/pull/4158 No functional change --- diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 9a992608..461a7b83 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -25,7 +25,7 @@ #include #include #include "../nnue_common.h" -#include "../../simd.h" +#include "simd.h" /* This file contains the definition for a fully connected layer (aka affine transform). @@ -151,9 +151,15 @@ namespace Stockfish::Eval::NNUE::Layers { template class AffineTransform; +#if defined (USE_AVX512) + constexpr IndexType LargeInputSize = 2 * 64; +#else + constexpr IndexType LargeInputSize = std::numeric_limits::max(); +#endif + // A specialization for large inputs. template - class AffineTransform(InDims, MaxSimdWidth) >= 2*64)>> { + class AffineTransform(InDims, MaxSimdWidth) >= LargeInputSize)>> { public: // Input/output type using InputType = std::uint8_t; @@ -170,7 +176,7 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions >= 128, "Something went wrong. This specialization should not have been chosen."); + static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen."); #if defined (USE_AVX512) static constexpr const IndexType InputSimdWidth = 64; @@ -369,7 +375,7 @@ namespace Stockfish::Eval::NNUE::Layers { }; template - class AffineTransform(InDims, MaxSimdWidth) < 2*64)>> { + class AffineTransform(InDims, MaxSimdWidth) < LargeInputSize)>> { public: // Input/output type // Input/output type @@ -387,7 +393,7 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions < 128, "Something went wrong. This specialization should not have been chosen."); + static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen."); #if defined (USE_SSSE3) static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; diff --git a/src/simd.h b/src/nnue/layers/simd.h similarity index 100% rename from src/simd.h rename to src/nnue/layers/simd.h