X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Faffine_transform.h;h=8d2acd1852eabfa9dce873b803040742a2da75f9;hp=985ee71a4193e571f9ecdddfc144ca4c2c571aea;hb=f46c73040c16a078b884825c203feee6b0a8850b;hpb=21df37d7fd4dcc9b4a9c319382cc43685c0259c8 diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 985ee71a..8d2acd18 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -126,8 +126,7 @@ namespace Eval::NNUE::Layers { const auto iv256 = reinterpret_cast(&input_vector[kNumChunks]); const auto row256 = reinterpret_cast(&row[kNumChunks]); __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); - product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1)); - sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256)); + sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256)); } output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];