#elif USE_MMX
typedef __m64 vec_t;
- typedef std::int32_t psqt_vec_t;
+ typedef __m64 psqt_vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
#define vec_load_psqt(a) (*(a))
#define vec_store_psqt(a,b) *(a)=(b)
- #define vec_add_psqt_32(a,b) a+b
- #define vec_sub_psqt_32(a,b) a-b
- #define vec_zero_psqt() 0
+ #define vec_add_psqt_32(a,b) _mm_add_pi32(a,b)
+ #define vec_sub_psqt_32(a,b) _mm_sub_pi32(a,b)
+ #define vec_zero_psqt() _mm_setzero_si64()
static constexpr IndexType NumRegs = 8;
- static constexpr IndexType NumPsqtRegs = 8;
+ static constexpr IndexType NumPsqtRegs = 4;
#elif USE_NEON
typedef int16x8_t vec_t;
// Number of output dimensions for one side
static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
- static constexpr int LazyThreshold = 1400;
-
#ifdef VECTOR
static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2;
static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
}
// Convert input features
- std::pair<std::int32_t, bool> transform(const Position& pos, OutputType* output, int bucket) const {
+ std::int32_t transform(const Position& pos, OutputType* output, int bucket) const {
update_accumulator(pos, WHITE);
update_accumulator(pos, BLACK);
- psqtAccumulation[static_cast<int>(perspectives[1])][bucket]
) / 2;
- if (abs(psqt) > LazyThreshold * OutputScale)
- return { psqt, true };
-
#if defined(USE_AVX512)
constexpr IndexType NumChunks = HalfDimensions / (SimdWidth * 2);
static_assert(HalfDimensions % (SimdWidth * 2) == 0);
_mm_empty();
#endif
- return { psqt, false };
+ return psqt;
}
private: