avoids an intrinsic that is missing in gcc < 10.
For this target, might trigger another gcc bug on windows that
requires up-to-date gcc 8, 9, or 10, or usage of clang.
Fixes https://github.com/official-stockfish/Stockfish/issues/2975
closes https://github.com/official-stockfish/Stockfish/pull/2976
No functional change
ifeq ($(avx512),yes)
CXXFLAGS += -DUSE_AVX512
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
- CXXFLAGS += -mavx512bw
+ CXXFLAGS += -mavx512f -mavx512bw
endif
endif
const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
__m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
- product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1));
- sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256));
+ sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
}
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];