- // Difference calculation for the deactivated features
- for (const auto index : removed[i])
- {
- const IndexType offset = HalfDimensions * index + j * TileHeight;
- auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
- for (IndexType k = 0; k < NumRegs; ++k)
- acc[k] = vec_sub_16(acc[k], column[k]);
- }
+ for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k)
+ accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(
+ accPsqtIn[k], columnPsqtR0[k]), columnPsqtA[k]);
+ }
+ else
+ {
+ const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1];
+ auto columnPsqtR1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR1]);
+
+ for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k)
+ accPsqtOut[k] = vec_sub_psqt_32(
+ vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]),
+ vec_add_psqt_32(columnPsqtR0[k], columnPsqtR1[k]));
+ }
+ }
+ else
+ {
+ for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+ {
+ // Load accumulator
+ auto accTileIn = reinterpret_cast<const vec_t*>(
+ &st->accumulator.accumulation[Perspective][j * TileHeight]);
+ for (IndexType k = 0; k < NumRegs; ++k)
+ acc[k] = vec_load(&accTileIn[k]);