From 7922e07af83dd472da6e5b38fb84214cfe46a630 Mon Sep 17 00:00:00 2001 From: Andreas Matthies Date: Tue, 13 Jun 2023 06:24:04 +0200 Subject: [PATCH] Fix for MSVC compilation. MSVC needs two more explicit casts to compile new affine_transform_sparse_input. See https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps and https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps closes https://github.com/official-stockfish/Stockfish/pull/4616 No functional change --- AUTHORS | 1 + src/nnue/layers/affine_transform_sparse_input.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/AUTHORS b/AUTHORS index 63b862ce..a89dc130 100644 --- a/AUTHORS +++ b/AUTHORS @@ -19,6 +19,7 @@ Alexander Kure Alexander Pagel (Lolligerhans) Alfredo Menezes (lonfom169) Ali AlZhrani (Cooffe) +Andreas Matthies (Matthies) Andrei Vetrov (proukornew) Andrew Grant (AndyGrant) Andrey Neporada (nepal) diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h index 00b17c19..e0c3a8a0 100644 --- a/src/nnue/layers/affine_transform_sparse_input.h +++ b/src/nnue/layers/affine_transform_sparse_input.h @@ -98,10 +98,10 @@ namespace Stockfish::Eval::NNUE::Layers { #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) #elif defined (USE_AVX2) using vec_t = __m256i; - #define vec_nnz(a) _mm256_movemask_ps((__m256)_mm256_cmpgt_epi32(a, _mm256_setzero_si256())) + #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) #elif defined (USE_SSSE3) using vec_t = __m128i; - #define vec_nnz(a) _mm_movemask_ps((__m128)_mm_cmpgt_epi32(a, _mm_setzero_si128())) + #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) #endif constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8) -- 2.39.2