- [[maybe_unused]] static __m128i m512_haddx4(
- __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
- __m128i bias) {
-
- __m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
-
- __m256i sum256lo = _mm512_castsi512_si256(sum);
- __m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
-
- sum256lo = _mm256_add_epi32(sum256lo, sum256hi);
-
- __m128i sum128lo = _mm256_castsi256_si128(sum256lo);
- __m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1);
-
- return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
- }
-