- // Calculate cumulative value without using difference calculation
- void RefreshAccumulator(const Position& pos) const {
- auto& accumulator = pos.state()->accumulator;
- IndexType i = 0;
- Features::IndexList active_indices[2];
- RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
- active_indices);
- for (Color perspective : { WHITE, BLACK }) {
- std::memcpy(accumulator.accumulation[perspective][i], biases_,
- kHalfDimensions * sizeof(BiasType));
- for (const auto index : active_indices[perspective]) {
- const IndexType offset = kHalfDimensions * index;
- #if defined(USE_AVX512)
- auto accumulation = reinterpret_cast<__m512i*>(
- &accumulator.accumulation[perspective][i][0]);
- auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
- constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
- for (IndexType j = 0; j < kNumChunks; ++j)
- _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
-
- #elif defined(USE_AVX2)
- auto accumulation = reinterpret_cast<__m256i*>(
- &accumulator.accumulation[perspective][i][0]);
- auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
- constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j)
- _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
-
- #elif defined(USE_SSE2)
- auto accumulation = reinterpret_cast<__m128i*>(
- &accumulator.accumulation[perspective][i][0]);
- auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
- constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j)
- accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-
- #elif defined(USE_MMX)
- auto accumulation = reinterpret_cast<__m64*>(
- &accumulator.accumulation[perspective][i][0]);
- auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
- constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j) {
- accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
- }
-
- #elif defined(USE_NEON)
- auto accumulation = reinterpret_cast<int16x8_t*>(
- &accumulator.accumulation[perspective][i][0]);
- auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
- constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
- for (IndexType j = 0; j < kNumChunks; ++j)
- accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-
- #else
- for (IndexType j = 0; j < kHalfDimensions; ++j)
- accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+ void update_accumulator(const Position& pos, const Color perspective) const {
+
+ // The size must be enough to contain the largest possible update.
+ // That might depend on the feature set and generally relies on the
+ // feature set's update cost calculation to be correct and never
+ // allow updates with more added/removed features than MaxActiveDimensions.
+ using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;
+
+ #ifdef VECTOR
+ // Gcc-10.2 unnecessarily spills AVX2 registers if this array
+ // is defined in the VECTOR code below, once in each branch
+ vec_t acc[NumRegs];