- // Update incrementally in two steps. First, we update the "next"
- // accumulator. Then, we update the current accumulator (pos.state()).
-
- // Gather all features to be updated. This code assumes HalfKP features
- // only and doesn't support refresh triggers.
- static_assert(std::is_same_v<Features::FeatureSet<Features::HalfKP<Features::Side::Friend>>,
- RawFeatures>);
- Features::IndexList removed[2], added[2];
- Features::HalfKP<Features::Side::Friend>::append_changed_indices(pos,
- next->dirtyPiece, c, &removed[0], &added[0]);
- for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
- Features::HalfKP<Features::Side::Friend>::append_changed_indices(pos,
- st2->dirtyPiece, c, &removed[1], &added[1]);
-
- // Mark the accumulators as computed.
- next->accumulator.state[c] = COMPUTED;
- pos.state()->accumulator.state[c] = COMPUTED;
-
- // Now update the accumulators listed in info[], where the last element is a sentinel.
- StateInfo *info[3] =
- { next, next == pos.state() ? nullptr : pos.state(), nullptr };
- #ifdef VECTOR
- for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
- {
- // Load accumulator
- auto accTile = reinterpret_cast<vec_t*>(
- &st->accumulator.accumulation[c][0][j * TileHeight]);
- for (IndexType k = 0; k < NumRegs; ++k)
- acc[k] = vec_load(&accTile[k]);
+ auto accIn = reinterpret_cast<const vec_t*>(
+ &st->accumulator.accumulation[Perspective][0]);
+ auto accOut = reinterpret_cast<vec_t*>(
+ &states_to_update[0]->accumulator.accumulation[Perspective][0]);
+
+ const IndexType offsetR0 = HalfDimensions * removed[0][0];
+ auto columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
+ const IndexType offsetA = HalfDimensions * added[0][0];
+ auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
+
+ if (removed[0].size() == 1)
+ {
+ for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); ++k)
+ accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
+ }
+ else
+ {
+ const IndexType offsetR1 = HalfDimensions * removed[0][1];
+ auto columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
+
+ for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); ++k)
+ accOut[k] = vec_sub_16(
+ vec_add_16(accIn[k], columnA[k]),
+ vec_add_16(columnR0[k], columnR1[k]));
+ }
+
+ auto accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
+ &st->accumulator.psqtAccumulation[Perspective][0]);
+ auto accPsqtOut = reinterpret_cast<psqt_vec_t*>(
+ &states_to_update[0]->accumulator.psqtAccumulation[Perspective][0]);
+
+ const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0];
+ auto columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
+ const IndexType offsetPsqtA = PSQTBuckets * added[0][0];
+ auto columnPsqtA = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA]);
+
+ if (removed[0].size() == 1)
+ {
+ for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k)
+ accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(
+ accPsqtIn[k], columnPsqtR0[k]), columnPsqtA[k]);
+ }
+ else
+ {
+ const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1];
+ auto columnPsqtR1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);