X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=turbopfor.cpp;h=b7902a292d7fb494c2eabe0d4e0675b06fbde50c;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=8a91c33144d2843cf2d15d3277407bcb54066110;hpb=2fcf8490a3a7a2e1d434f33383b5d33a0bc3ac03;p=plocate diff --git a/turbopfor.cpp b/turbopfor.cpp index 8a91c33..b7902a2 100644 --- a/turbopfor.cpp +++ b/turbopfor.cpp @@ -1,13 +1,32 @@ #include #include +#ifdef HAS_ENDIAN_H #include +#endif #include #include #include +// This is a mess. :-/ Maybe it would be good just to drop support for +// multiversioning; the only platform it really helps is 32-bit x86. +// This may change if we decide to use AVX or similar in the future, though. #if defined(__i386__) || defined(__x86_64__) +#ifdef __SSE2__ +#define COULD_HAVE_SSE2 +#define SUPPRESS_DEFAULT +#include +#define TARGET_SSE2 +#elif defined(HAS_FUNCTION_MULTIVERSIONING) #define COULD_HAVE_SSE2 #include +#define TARGET_SSE2 __attribute__((target("sse2"))) +#define TARGET_DEFAULT __attribute__((target("default"))) +#else +#define TARGET_DEFAULT +#endif +#else +// Function multiversioning is x86-only. +#define TARGET_DEFAULT #endif #include "turbopfor-common.h" @@ -15,26 +34,28 @@ #define dprintf(...) //#define dprintf(...) fprintf(stderr, __VA_ARGS__); +#ifndef SUPPRESS_DEFAULT // Forward declarations to declare to the template code below that they exist. // (These must seemingly be non-templates for function multiversioning to work.) -__attribute__((target("default"))) +TARGET_DEFAULT const unsigned char * decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out); -__attribute__((target("default"))) +TARGET_DEFAULT const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out); -__attribute__((target("default"))) +TARGET_DEFAULT const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out); +#endif #ifdef COULD_HAVE_SSE2 -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out); -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out); -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out); #endif @@ -74,6 +95,12 @@ const unsigned char *read_baseval(const unsigned char *in, Docid *out) (uint32_t(in[2]) << 8) | (uint32_t(in[1]))) & 0x1fffff; return in + 3; + } else if (*in < 240) { + *out = ((uint32_t(in[0]) << 24) | + (uint32_t(in[1]) << 16) | + (uint32_t(in[2]) << 8) | + (uint32_t(in[3]))) & 0xfffffff; + return in + 4; } else { assert(false); // Not implemented. } @@ -163,12 +190,12 @@ private: #ifdef COULD_HAVE_SSE2 struct InterleavedBitReaderSSE2 { public: - __attribute__((target("sse2"))) + TARGET_SSE2 InterleavedBitReaderSSE2(const unsigned char *in, unsigned bits) : in(reinterpret_cast(in)), bits(bits), mask(_mm_set1_epi32(mask_for_bits(bits))) {} // Can read 16 bytes past the end of the input (if bit_width == 0). - __attribute__((target("sse2"))) + TARGET_SSE2 __m128i read() { @@ -237,11 +264,11 @@ const unsigned char *decode_for(const unsigned char *in, unsigned num, Docid *ou #ifdef COULD_HAVE_SSE2 class DeltaDecoderSSE2 { public: - __attribute__((target("sse2"))) + TARGET_SSE2 DeltaDecoderSSE2(uint32_t prev_val) : prev_val(_mm_set1_epi32(prev_val)) {} - __attribute__((target("sse2"))) + TARGET_SSE2 __m128i decode(__m128i val) { @@ -261,7 +288,7 @@ private: }; template -__attribute__((target("sse2"))) inline void delta_decode_sse2(uint32_t *out) +TARGET_SSE2 inline void delta_decode_sse2(uint32_t *out) { DeltaDecoderSSE2 delta(out[-1]); __m128i *outvec = reinterpret_cast<__m128i *>(out); @@ -273,8 +300,7 @@ __attribute__((target("sse2"))) inline void delta_decode_sse2(uint32_t *out) // Can read 16 bytes past the end of its input (inherit from InterleavedBitReaderSSE2). template -__attribute__((target("sse2"))) -const unsigned char * +TARGET_SSE2 const unsigned char * decode_bitmap_sse2_unrolled(const unsigned char *in, uint32_t *out) { __m128i *outvec = reinterpret_cast<__m128i *>(out); @@ -297,8 +323,7 @@ decode_bitmap_sse2_unrolled(const unsigned char *in, uint32_t *out) // Can read 16 bytes past the end of its input (inherit from InterleavedBitReaderSSE2). template -__attribute__((target("sse2"))) -const unsigned char * +TARGET_SSE2 const unsigned char * decode_bitmap_sse2(const unsigned char *in, unsigned bit_width, uint32_t *out) { switch (bit_width) { @@ -410,18 +435,20 @@ const unsigned char *decode_for_interleaved(const unsigned char *in, Docid *out) } } +#ifndef SUPPRESS_DEFAULT // Does not read past the end of the input. -__attribute__((target("default"))) +TARGET_DEFAULT const unsigned char * decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out) { return decode_for_interleaved_generic<128>(in, out); } +#endif #ifdef COULD_HAVE_SSE2 // Specialized version for SSE2. // Can read 16 bytes past the end of the input (inherit from decode_bitmap_sse2()). -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out) { @@ -533,19 +560,21 @@ const unsigned char *decode_pfor_bitmap_interleaved(const unsigned char *in, Doc } } -__attribute__((target("default"))) +#ifndef SUPPRESS_DEFAULT +TARGET_DEFAULT const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out) { return decode_pfor_bitmap_interleaved_generic<128>(in, out); } +#endif #ifdef COULD_HAVE_SSE2 // Specialized version for SSE2. // // Can read 16 bytes past the end of the input (inherit from InterleavedBitReaderSSE2 // and decode_pfor_bitmap_exceptions()). -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out) { @@ -683,16 +712,19 @@ const unsigned char *decode_pfor_vb_interleaved(const unsigned char *in, Docid * } } -__attribute__((target("default"))) +#ifndef SUPPRESS_DEFAULT +TARGET_DEFAULT const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out) { return decode_pfor_vb_interleaved_generic<128>(in, out); } +#endif +#ifdef COULD_HAVE_SSE2 // Specialized version for SSE2. // Can read 16 bytes past the end of the input (inherit from decode_bitmap_sse2()). -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out) { @@ -729,6 +761,7 @@ decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out) return in; } +#endif // Can read 16 bytes past the end of the input (inherit from several functions). template