X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Flayers%2Faffine_transform_sparse_input.h;fp=src%2Fnnue%2Flayers%2Faffine_transform_sparse_input.h;h=a5bea08e74b3fbaea0431a5daf2329d83288e605;hp=3c7defcc42cbc25425d30e98708d13eaa54a8ac2;hb=529d3be8e245c06b62a45525cb9325ed6e50b636;hpb=f972947492c513b7c0c4046058354b44a9ae9f04 diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h index 3c7defcc..a5bea08e 100644 --- a/src/nnue/layers/affine_transform_sparse_input.h +++ b/src/nnue/layers/affine_transform_sparse_input.h @@ -34,43 +34,15 @@ */ namespace Stockfish::Eval::NNUE::Layers { -#if defined(__GNUC__) // GCC, Clang, ICC - - static inline IndexType lsb_(std::uint32_t b) { - assert(b); - return IndexType(__builtin_ctzl(b)); - } - -#elif defined(_MSC_VER) // MSVC - - static inline IndexType lsb_(std::uint32_t b) { - assert(b); - unsigned long idx; - _BitScanForward(&idx, b); - return (IndexType) idx; - } - -#else // Compiler is neither GCC nor MSVC compatible - -#error "Compiler not supported." - -#endif - #if defined(USE_SSSE3) alignas(CacheLineSize) static inline const std::array, 256> lookup_indices = [](){ std::array, 256> v{}; - for (int i = 0; i < 256; ++i) + for (unsigned i = 0; i < 256; ++i) { - int j = i; - int k = 0; + std::uint64_t j = i, k = 0; while(j) - { - const IndexType lsbIndex = lsb_(std::uint32_t(j)); - j &= j - 1; - v[i][k] = lsbIndex; - ++k; - } + v[i][k++] = pop_lsb(j); } return v; }(); @@ -83,7 +55,11 @@ namespace Stockfish::Eval::NNUE::Layers { #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) #elif defined (USE_AVX2) using vec_t = __m256i; - #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) + #if defined(USE_VNNI) && !defined(USE_AVXVNNI) + #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256()) + #else + #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) + #endif #elif defined (USE_SSSE3) using vec_t = __m128i; #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) @@ -97,8 +73,8 @@ namespace Stockfish::Eval::NNUE::Layers { const auto inputVector = reinterpret_cast(input); IndexType count = 0; - __m128i base = _mm_set1_epi16(0); - __m128i increment = _mm_set1_epi16(8); + __m128i base = _mm_setzero_si128(); + const __m128i increment = _mm_set1_epi16(8); for (IndexType i = 0; i < NumChunks; ++i) { // bitmask of nonzero values in this chunk