From 4b28e9fde29d4bc8857e46abe1d9067507c7fc20 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 6 Oct 2020 20:39:48 +0200 Subject: [PATCH] Convert the SSE2 delta decoder state into a class. --- turbopfor.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/turbopfor.h b/turbopfor.h index ef5c831..70b7438 100644 --- a/turbopfor.h +++ b/turbopfor.h @@ -209,23 +209,34 @@ const unsigned char *decode_for(const unsigned char *in, unsigned num, Docid *ou } #ifdef COULD_HAVE_SSE2 +class DeltaDecoderSSE2 { +public: + DeltaDecoderSSE2(uint32_t prev_val) : prev_val(_mm_set1_epi32(prev_val)) {} + __m128i decode(__m128i val) { + val = _mm_add_epi32(val, _mm_slli_si128(val, 4)); + val = _mm_add_epi32(val, _mm_slli_si128(val, 8)); + val = _mm_add_epi32(val, _mm_add_epi32(prev_val, delta)); + prev_val = _mm_shuffle_epi32(val, _MM_SHUFFLE(3, 3, 3, 3)); + return val; + } + +private: + // Use 4/3/2/1 as delta instead of fixed 1, so that we can do the prev_val + delta + // in parallel with something else. + const __m128i delta = _mm_set_epi32(4, 3, 2, 1); + + __m128i prev_val; +}; + template __attribute__((target("sse2"))) inline void delta_decode_sse2(uint32_t *out) { - // Use 4/3/2/1 as delta instead of fixed 1, so that we can do the prev_val + delta - // in parallel with something else. - const __m128i delta = _mm_set_epi32(4, 3, 2, 1); - __m128i prev_val = _mm_set1_epi32(out[-1]); + DeltaDecoderSSE2 delta(out[-1]); __m128i *outvec = reinterpret_cast<__m128i *>(out); for (unsigned i = 0; i < BlockSize / 4; ++i) { __m128i val = _mm_loadu_si128(outvec + i); - val = _mm_add_epi32(val, _mm_slli_si128(val, 4)); - val = _mm_add_epi32(val, _mm_slli_si128(val, 8)); - val = _mm_add_epi32(val, _mm_add_epi32(prev_val, delta)); - _mm_storeu_si128(outvec + i, val); - - prev_val = _mm_shuffle_epi32(val, _MM_SHUFFLE(3, 3, 3, 3)); + _mm_storeu_si128(outvec + i, delta.decode(val)); } } -- 2.39.2