X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Fsimd.h;h=349217edb7a607e38dff89eaafc640d04e0674b9;hb=8a912951de6d4bff78d3ff5258213a0c7e6f494e;hp=638e39941a856500bb09151ce64c8133bb04d36a;hpb=a6d9a302b867a76c3df5b658de6206e77b649a4d;p=stockfish diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h index 638e3994..349217ed 100644 --- a/src/nnue/layers/simd.h +++ b/src/nnue/layers/simd.h @@ -31,9 +31,6 @@ #elif defined(USE_SSE2) # include -#elif defined(USE_MMX) -# include - #elif defined(USE_NEON) # include #endif @@ -79,23 +76,6 @@ namespace Stockfish::Simd { return _mm512_add_epi32(sum0123a, sum0123b); } - [[maybe_unused]] static __m128i m512_haddx4( - __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, - __m128i bias) { - - __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3); - - __m256i sum256lo = _mm512_castsi512_si256(sum); - __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1); - - sum256lo = _mm256_add_epi32(sum256lo, sum256hi); - - __m128i sum128lo = _mm256_castsi256_si128(sum256lo); - __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1); - - return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); - } - [[maybe_unused]] static void m512_add_dpbusd_epi32( __m512i& acc, __m512i a, @@ -138,21 +118,6 @@ namespace Stockfish::Simd { return _mm_cvtsi128_si32(sum128) + bias; } - [[maybe_unused]] static __m128i m256_haddx4( - __m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, - __m128i bias) { - - sum0 = _mm256_hadd_epi32(sum0, sum1); - sum2 = _mm256_hadd_epi32(sum2, sum3); - - sum0 = _mm256_hadd_epi32(sum0, sum2); - - __m128i sum128lo = _mm256_castsi256_si128(sum0); - __m128i sum128hi = _mm256_extracti128_si256(sum0, 1); - - return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias); - } - [[maybe_unused]] static void m256_add_dpbusd_epi32( __m256i& acc, __m256i a, @@ -194,16 +159,6 @@ namespace Stockfish::Simd { return _mm_cvtsi128_si32(sum) + bias; } - [[maybe_unused]] static __m128i m128_haddx4( - __m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, - __m128i bias) { - - sum0 = _mm_hadd_epi32(sum0, sum1); - sum2 = _mm_hadd_epi32(sum2, sum3); - sum0 = _mm_hadd_epi32(sum0, sum2); - return _mm_add_epi32(sum0, bias); - } - [[maybe_unused]] static void m128_add_dpbusd_epi32( __m128i& acc, __m128i a, @@ -261,19 +216,6 @@ namespace Stockfish::Simd { return neon_m128_reduce_add_epi32(sum) + bias; } - [[maybe_unused]] static int32x4_t neon_m128_haddx4( - int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3, - int32x4_t bias) { - - int32x4_t hsums { - neon_m128_reduce_add_epi32(sum0), - neon_m128_reduce_add_epi32(sum1), - neon_m128_reduce_add_epi32(sum2), - neon_m128_reduce_add_epi32(sum3) - }; - return vaddq_s32(hsums, bias); - } - [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( int32x4_t& acc, int8x8_t a0, int8x8_t b0,