X-Git-Url: https://git.sesse.net/?p=stockfish;a=blobdiff_plain;f=src%2Fnnue%2Fnnue_feature_transformer.h;fp=src%2Fnnue%2Fnnue_feature_transformer.h;h=4f6a174a486667ea23acab3e3d99c4f1fe13d438;hp=0297b3233a1e42f032b557558788de528f960e35;hb=4766dfc3956f78d853c5e0c4636d6f90fd93df9a;hpb=b82d93ece484f833c994b40d9eddd959ba20ef92 diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 0297b323..4f6a174a 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -336,10 +336,17 @@ namespace Stockfish::Eval::NNUE { { const IndexType offset = HalfDimensions * p; const auto out = reinterpret_cast(&output[offset]); - for (IndexType j = 0; j < NumChunks; ++j) + + constexpr IndexType UnrollFactor = 16; + static_assert(UnrollFactor % UnrollFactor == 0); + for (IndexType j = 0; j < NumChunks; j += UnrollFactor) { - int16x8_t sum = reinterpret_cast(accumulation[perspectives[p]])[j]; - out[j] = vmax_s8(vqmovn_s16(sum), Zero); + int16x8_t sums[UnrollFactor]; + for (IndexType i = 0; i < UnrollFactor; ++i) + sums[i] = reinterpret_cast(accumulation[perspectives[p]])[j+i]; + + for (IndexType i = 0; i < UnrollFactor; ++i) + out[j+i] = vmax_s8(vqmovn_s16(sums[i]), Zero); } } return psqt;