- auto& accumulator = pos.state()->accumulator;
- IndexType i = 0;
- Features::IndexList removed_indices[2], added_indices[2];
- bool reset[2] = { false, false };
- RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
- removed_indices, added_indices, reset);
-
- #ifdef TILING
- for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
- for (Color perspective : { WHITE, BLACK }) {
- auto accTile = reinterpret_cast<vec_t*>(
- &accumulator.accumulation[perspective][i][j * kTileHeight]);
- vec_t acc[kNumRegs];
-
- if (reset[perspective]) {
- auto biasesTile = reinterpret_cast<const vec_t*>(
- &biases_[j * kTileHeight]);
- for (unsigned k = 0; k < kNumRegs; ++k)
- acc[k] = biasesTile[k];
- } else {
- auto prevAccTile = reinterpret_cast<const vec_t*>(
- &prev_accumulator->accumulation[perspective][i][j * kTileHeight]);
- for (IndexType k = 0; k < kNumRegs; ++k)
- acc[k] = vec_load(&prevAccTile[k]);
-
- // Difference calculation for the deactivated features
- for (const auto index : removed_indices[perspective]) {
- const IndexType offset = kHalfDimensions * index + j * kTileHeight;
- auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-
- for (IndexType k = 0; k < kNumRegs; ++k)
- acc[k] = vec_sub_16(acc[k], column[k]);
+ const IndexType offsetR0 = HalfDimensions * removed[0][0];
+ auto columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
+ const IndexType offsetA = HalfDimensions * added[0][0];
+ auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
+
+ if (removed[0].size() == 1)
+ {
+ for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); ++k)
+ accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
+ }
+ else
+ {
+ const IndexType offsetR1 = HalfDimensions * removed[0][1];
+ auto columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
+
+ for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); ++k)
+ accOut[k] = vec_sub_16(
+ vec_add_16(accIn[k], columnA[k]),
+ vec_add_16(columnR0[k], columnR1[k]));
+ }
+
+ auto accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
+ &st->accumulator.psqtAccumulation[Perspective][0]);
+ auto accPsqtOut = reinterpret_cast<psqt_vec_t*>(
+ &states_to_update[0]->accumulator.psqtAccumulation[Perspective][0]);
+
+ const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0];
+ auto columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
+ const IndexType offsetPsqtA = PSQTBuckets * added[0][0];
+ auto columnPsqtA = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtA]);
+
+ if (removed[0].size() == 1)
+ {
+ for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k)
+ accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(
+ accPsqtIn[k], columnPsqtR0[k]), columnPsqtA[k]);
+ }
+ else
+ {
+ const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1];
+ auto columnPsqtR1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
+
+ for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k)
+ accPsqtOut[k] = vec_sub_psqt_32(
+ vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]),
+ vec_add_psqt_32(columnPsqtR0[k], columnPsqtR1[k]));
+ }
+ }
+ else
+ {
+ for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+ {
+ // Load accumulator
+ auto accTileIn = reinterpret_cast<const vec_t*>(
+ &st->accumulator.accumulation[Perspective][j * TileHeight]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = vec_load(&accTileIn[k]);
+
+ for (IndexType i = 0; states_to_update[i]; ++i)
+ {
+ // Difference calculation for the deactivated features
+ for (const auto index : removed[i])
+ {
+ const IndexType offset = HalfDimensions * index + j * TileHeight;
+ auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = vec_sub_16(acc[k], column[k]);
+ }
+
+ // Difference calculation for the activated features
+ for (const auto index : added[i])
+ {
+ const IndexType offset = HalfDimensions * index + j * TileHeight;
+ auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = vec_add_16(acc[k], column[k]);
+ }
+
+ // Store accumulator
+ auto accTileOut = reinterpret_cast<vec_t*>(
+ &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ vec_store(&accTileOut[k], acc[k]);