X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Faffine_transform_sparse_input.h;fp=src%2Fnnue%2Flayers%2Faffine_transform_sparse_input.h;h=e0c3a8a06bb5478c2f4e48d21b651a15699591a0;hp=00b17c19f129b868050492371a8752c54efdf5ce;hb=7922e07af83dd472da6e5b38fb84214cfe46a630;hpb=92c949e12e028cb4556de2786a77f2aec178d59f diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h index 00b17c19..e0c3a8a0 100644 --- a/src/nnue/layers/affine_transform_sparse_input.h +++ b/src/nnue/layers/affine_transform_sparse_input.h @@ -98,10 +98,10 @@ namespace Stockfish::Eval::NNUE::Layers { #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) #elif defined (USE_AVX2) using vec_t = __m256i; - #define vec_nnz(a) _mm256_movemask_ps((__m256)_mm256_cmpgt_epi32(a, _mm256_setzero_si256())) + #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) #elif defined (USE_SSSE3) using vec_t = __m128i; - #define vec_nnz(a) _mm_movemask_ps((__m128)_mm_cmpgt_epi32(a, _mm_setzero_si128())) + #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) #endif constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)