#define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b)
#define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b)
#define vec_zero_psqt() _mm256_setzero_si256()
- #define NumRegistersSIMD 32
+ #define NumRegistersSIMD 16
#define MaxChunkSize 64
#elif USE_AVX2
#define NumRegistersSIMD (Is64Bit ? 16 : 8)
#define MaxChunkSize 16
- #elif USE_MMX
- using vec_t = __m64;
- using psqt_vec_t = __m64;
- #define vec_load(a) (*(a))
- #define vec_store(a,b) *(a)=(b)
- #define vec_add_16(a,b) _mm_add_pi16(a,b)
- #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
- #define vec_mul_16(a,b) _mm_mullo_pi16(a,b)
- #define vec_zero() _mm_setzero_si64()
- #define vec_set_16(a) _mm_set1_pi16(a)
- inline vec_t vec_max_16(vec_t a,vec_t b){
- vec_t comparison = _mm_cmpgt_pi16(a,b);
- return _mm_or_si64(_mm_and_si64(comparison, a), _mm_andnot_si64(comparison, b));
- }
- inline vec_t vec_min_16(vec_t a,vec_t b){
- vec_t comparison = _mm_cmpgt_pi16(a,b);
- return _mm_or_si64(_mm_and_si64(comparison, b), _mm_andnot_si64(comparison, a));
- }
- #define vec_msb_pack_16(a,b) _mm_packs_pi16(_mm_srli_pi16(a,7),_mm_srli_pi16(b,7))
- #define vec_load_psqt(a) (*(a))
- #define vec_store_psqt(a,b) *(a)=(b)
- #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
- #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
- #define vec_zero_psqt() _mm_setzero_si64()
- #define vec_cleanup() _mm_empty()
- #define NumRegistersSIMD 8
- #define MaxChunkSize 8
-
#elif USE_NEON
using vec_t = int16x8_t;
using psqt_vec_t = int32x4_t;
for (IndexType j = 0; j < HalfDimensions / 2; ++j) {
BiasType sum0 = accumulation[static_cast<int>(perspectives[p])][j + 0];
BiasType sum1 = accumulation[static_cast<int>(perspectives[p])][j + HalfDimensions / 2];
- sum0 = std::max<int>(0, std::min<int>(127, sum0));
- sum1 = std::max<int>(0, std::min<int>(127, sum1));
- output[offset + j] = static_cast<OutputType>(sum0 * sum1 / 128);
+ sum0 = std::clamp<BiasType>(sum0, 0, 127);
+ sum1 = std::clamp<BiasType>(sum1, 0, 127);
+ output[offset + j] = static_cast<OutputType>(unsigned(sum0 * sum1) / 128);
}
#endif
}
-#if defined(vec_cleanup)
- vec_cleanup();
-#endif
-
return psqt;
} // end of function transform()
}
}
#endif
-
- #if defined(USE_MMX)
- _mm_empty();
- #endif
}
template<Color Perspective>
accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
}
#endif
-
- #if defined(USE_MMX)
- _mm_empty();
- #endif
}
template<Color Perspective>