X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Fsimd.h;fp=src%2Fnnue%2Flayers%2Fsimd.h;h=638e39941a856500bb09151ce64c8133bb04d36a;hp=fae31a62955db30d3374003829bd64dd3c6231da;hb=a6d9a302b867a76c3df5b658de6206e77b649a4d;hpb=4c43e1e27ce990735fb0226e35248fc82ea6a519 diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h index fae31a62..638e3994 100644 --- a/src/nnue/layers/simd.h +++ b/src/nnue/layers/simd.h @@ -239,6 +239,12 @@ namespace Stockfish::Simd { acc = vdotq_s32(acc, a1, b1); } + [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32( + int32x4_t& acc, + int8x16_t a, int8x16_t b) { + + acc = vdotq_s32(acc, a, b); + } #endif #if defined (USE_NEON) @@ -277,9 +283,19 @@ namespace Stockfish::Simd { product = vmlal_s8(product, a1, b1); acc = vpadalq_s16(acc, product); } - #endif +#if USE_NEON >= 8 + [[maybe_unused]] static void neon_m128_add_dpbusd_epi32( + int32x4_t& acc, + int8x16_t a, int8x16_t b) { + + int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b)); + int16x8_t product1 = vmull_high_s8(a, b); + int16x8_t sum = vpaddq_s16(product0, product1); + acc = vpadalq_s16(acc, sum); + } +#endif } #endif // STOCKFISH_SIMD_H_INCLUDED