X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fnnue%2Fnnue_feature_transformer.h;h=741d97cf1190c539bdcf2766ae6852bdefc52a12;hb=f193778446acc6e60d7f0f99c6eb01489f89e962;hp=2c0a0c6d3134b61270a9a16a3a1ae199176d9d07;hpb=e8d64af1230fdac65bb0da246df3e7abe82e0838;p=stockfish diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2c0a0c6d..741d97cf 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -84,18 +84,18 @@ namespace Stockfish::Eval::NNUE { #elif USE_MMX typedef __m64 vec_t; - typedef std::int32_t psqt_vec_t; + typedef __m64 psqt_vec_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) #define vec_sub_16(a,b) _mm_sub_pi16(a,b) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) a+b - #define vec_sub_psqt_32(a,b) a-b - #define vec_zero_psqt() 0 + #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b) + #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b) + #define vec_zero_psqt() _mm_setzero_si64() static constexpr IndexType NumRegs = 8; - static constexpr IndexType NumPsqtRegs = 8; + static constexpr IndexType NumPsqtRegs = 4; #elif USE_NEON typedef int16x8_t vec_t; @@ -124,8 +124,6 @@ namespace Stockfish::Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; - static constexpr int LazyThreshold = 1400; - #ifdef VECTOR static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; @@ -171,7 +169,7 @@ namespace Stockfish::Eval::NNUE { } // Convert input features - std::pair transform(const Position& pos, OutputType* output, int bucket) const { + std::int32_t transform(const Position& pos, OutputType* output, int bucket) const { update_accumulator(pos, WHITE); update_accumulator(pos, BLACK); @@ -184,9 +182,6 @@ namespace Stockfish::Eval::NNUE { - psqtAccumulation[static_cast(perspectives[1])][bucket] ) / 2; - if (abs(psqt) > LazyThreshold * OutputScale) - return { psqt, true }; - #if defined(USE_AVX512) constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2); static_assert(HalfDimensions % (SimdWidth * 2) == 0); @@ -293,7 +288,7 @@ namespace Stockfish::Eval::NNUE { _mm_empty(); #endif - return { psqt, false }; + return psqt; } private: