From: Steinar H. Gunderson Date: Tue, 6 Oct 2020 20:20:00 +0000 (+0200) Subject: Hand-roll zeroing of destination docids for SSE2; takes us seemingly up from ~84... X-Git-Tag: 1.0.0~42 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=0bf819d19af7ae0f5b7255322658f5aaa2b14192;p=plocate Hand-roll zeroing of destination docids for SSE2; takes us seemingly up from ~84% to ~89% of reference. --- diff --git a/turbopfor.h b/turbopfor.h index e4c31e6..84e354d 100644 --- a/turbopfor.h +++ b/turbopfor.h @@ -507,7 +507,11 @@ const unsigned char *decode_pfor_bitmap_interleaved_128_32(const unsigned char * constexpr unsigned BlockSize = 128; using Docid = uint32_t; - memset(out, 0, BlockSize * sizeof(Docid)); + // Set all output values to zero, before the exceptions are filled in. + #pragma GCC unroll 4 + for (unsigned i = 0; i < BlockSize / 4; ++i) { + _mm_storeu_si128(reinterpret_cast<__m128i *>(out) + i, _mm_setzero_si128()); + } const unsigned bit_width = *in++ & 0x3f;