X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Fsimd.h;fp=src%2Fnnue%2Flayers%2Fsimd.h;h=f478cd7819f9aee7d9e9d6c77d2cf2af879ae841;hp=638e39941a856500bb09151ce64c8133bb04d36a;hb=8192945870967fb9c8801247d0b040b2bc657443;hpb=0d2ddb81ef44211e7bf40369dc8fc52160d0ee18 diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h index 638e3994..f478cd78 100644 --- a/src/nnue/layers/simd.h +++ b/src/nnue/layers/simd.h @@ -79,23 +79,6 @@ namespace Stockfish::Simd { return _mm512_add_epi32(sum0123a, sum0123b); } - [[maybe_unused]] static __m128i m512_haddx4( - __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, - __m128i bias) { - - __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3); - - __m256i sum256lo = _mm512_castsi512_si256(sum); - __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1); - - sum256lo = _mm256_add_epi32(sum256lo, sum256hi); - - __m128i sum128lo = _mm256_castsi256_si128(sum256lo); - __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1); - - return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); - } - [[maybe_unused]] static void m512_add_dpbusd_epi32( __m512i& acc, __m512i a, @@ -138,21 +121,6 @@ namespace Stockfish::Simd { return _mm_cvtsi128_si32(sum128) + bias; } - [[maybe_unused]] static __m128i m256_haddx4( - __m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, - __m128i bias) { - - sum0 = _mm256_hadd_epi32(sum0, sum1); - sum2 = _mm256_hadd_epi32(sum2, sum3); - - sum0 = _mm256_hadd_epi32(sum0, sum2); - - __m128i sum128lo = _mm256_castsi256_si128(sum0); - __m128i sum128hi = _mm256_extracti128_si256(sum0, 1); - - return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); - } - [[maybe_unused]] static void m256_add_dpbusd_epi32( __m256i& acc, __m256i a, @@ -194,16 +162,6 @@ namespace Stockfish::Simd { return _mm_cvtsi128_si32(sum) + bias; } - [[maybe_unused]] static __m128i m128_haddx4( - __m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, - __m128i bias) { - - sum0 = _mm_hadd_epi32(sum0, sum1); - sum2 = _mm_hadd_epi32(sum2, sum3); - sum0 = _mm_hadd_epi32(sum0, sum2); - return _mm_add_epi32(sum0, bias); - } - [[maybe_unused]] static void m128_add_dpbusd_epi32( __m128i& acc, __m128i a, @@ -261,19 +219,6 @@ namespace Stockfish::Simd { return neon_m128_reduce_add_epi32(sum) + bias; } - [[maybe_unused]] static int32x4_t neon_m128_haddx4( - int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3, - int32x4_t bias) { - - int32x4_t hsums { - neon_m128_reduce_add_epi32(sum0), - neon_m128_reduce_add_epi32(sum1), - neon_m128_reduce_add_epi32(sum2), - neon_m128_reduce_add_epi32(sum3) - }; - return vaddq_s32(hsums, bias); - } - [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( int32x4_t& acc, int8x8_t a0, int8x8_t b0,