X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Fclipped_relu.h;h=48cd6c6934547a06852a248b6ad936733f8a0faa;hb=8a912951de6d4bff78d3ff5258213a0c7e6f494e;hp=ffd2e3b76a94523eec1beb01d6b67bec122a89d2;hpb=cb9c2594fcedc881ae8f8bfbfdf130cf89840e4c;p=stockfish diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index ffd2e3b7..48cd6c69 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,6 +21,10 @@ #ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED #define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED +#include +#include +#include + #include "../nnue_common.h" namespace Stockfish::Eval::NNUE::Layers { @@ -59,7 +63,7 @@ namespace Stockfish::Eval::NNUE::Layers { } // Forward propagation - const OutputType* propagate( + void propagate( const InputType* input, OutputType* output) const { #if defined(USE_AVX2) @@ -131,24 +135,6 @@ namespace Stockfish::Eval::NNUE::Layers { } constexpr IndexType Start = NumChunks * SimdWidth; - #elif defined(USE_MMX) - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m64 k0x80s = _mm_set1_pi8(-128); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m64*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m64 words0 = _mm_srai_pi16( - _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]), - WeightScaleBits); - const __m64 words1 = _mm_srai_pi16( - _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]), - WeightScaleBits); - const __m64 packedbytes = _mm_packs_pi16(words0, words1); - out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); - } - _mm_empty(); - constexpr IndexType Start = NumChunks * SimdWidth; - #elif defined(USE_NEON) constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); const int8x8_t Zero = {0}; @@ -168,18 +154,8 @@ namespace Stockfish::Eval::NNUE::Layers { for (IndexType i = Start; i < InputDimensions; ++i) { output[i] = static_cast( - std::max(0, std::min(127, input[i] >> WeightScaleBits))); - } - - // Affine transform layers expect that there is at least - // ceil_to_multiple(OutputDimensions, 32) initialized values. - // We cannot do this in the affine transform because it requires - // preallocating space here. - for (IndexType i = OutputDimensions; i < PaddedOutputDimensions; ++i) { - output[i] = 0; + std::clamp(input[i] >> WeightScaleBits, 0, 127)); } - - return output; } };