X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fsimd.h;h=ffa54d9627bd684a01057bd21bf3a0104f19a0cf;hp=1ac98067f5e5ecd9e3309ae4215ec161d0357dd1;hb=4766dfc3956f78d853c5e0c4636d6f90fd93df9a;hpb=b82d93ece484f833c994b40d9eddd959ba20ef92 diff --git a/src/simd.h b/src/simd.h index 1ac98067..ffa54d96 100644 --- a/src/simd.h +++ b/src/simd.h @@ -343,6 +343,45 @@ namespace Stockfish::Simd { #endif +#if defined (USE_NEON) + + [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { +# if USE_NEON >= 8 + return vaddvq_s32(s); +# else + return s[0] + s[1] + s[2] + s[3]; +# endif + } + + [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { + return neon_m128_reduce_add_epi32(sum) + bias; + } + + [[maybe_unused]] static int32x4_t neon_m128_haddx4( + int32x4_t sum0, int32x4_t sum1, int32x4_t sum2, int32x4_t sum3, + int32x4_t bias) { + + int32x4_t hsums { + neon_m128_reduce_add_epi32(sum0), + neon_m128_reduce_add_epi32(sum1), + neon_m128_reduce_add_epi32(sum2), + neon_m128_reduce_add_epi32(sum3) + }; + return vaddq_s32(hsums, bias); + } + + [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( + int32x4_t& acc, + int8x8_t a0, int8x8_t b0, + int8x8_t a1, int8x8_t b1) { + + int16x8_t product = vmull_s8(a0, b0); + product = vmlal_s8(product, a1, b1); + acc = vpadalq_s16(acc, product); + } + +#endif + } #endif // STOCKFISH_SIMD_H_INCLUDED