X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=turbopfor.cpp;h=b7902a292d7fb494c2eabe0d4e0675b06fbde50c;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=2c7ef58c6589ad9d94e8ef4f50d69df58c78dd10;hpb=3659603fea2f498950008ad5b60f44edf11ffb43;p=plocate diff --git a/turbopfor.cpp b/turbopfor.cpp index 2c7ef58..b7902a2 100644 --- a/turbopfor.cpp +++ b/turbopfor.cpp @@ -1,15 +1,30 @@ #include #include +#ifdef HAS_ENDIAN_H #include +#endif #include #include #include +// This is a mess. :-/ Maybe it would be good just to drop support for +// multiversioning; the only platform it really helps is 32-bit x86. +// This may change if we decide to use AVX or similar in the future, though. #if defined(__i386__) || defined(__x86_64__) +#ifdef __SSE2__ +#define COULD_HAVE_SSE2 +#define SUPPRESS_DEFAULT +#include +#define TARGET_SSE2 +#elif defined(HAS_FUNCTION_MULTIVERSIONING) #define COULD_HAVE_SSE2 #include +#define TARGET_SSE2 __attribute__((target("sse2"))) #define TARGET_DEFAULT __attribute__((target("default"))) #else +#define TARGET_DEFAULT +#endif +#else // Function multiversioning is x86-only. #define TARGET_DEFAULT #endif @@ -19,6 +34,7 @@ #define dprintf(...) //#define dprintf(...) fprintf(stderr, __VA_ARGS__); +#ifndef SUPPRESS_DEFAULT // Forward declarations to declare to the template code below that they exist. // (These must seemingly be non-templates for function multiversioning to work.) TARGET_DEFAULT @@ -30,15 +46,16 @@ decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out); TARGET_DEFAULT const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out); +#endif #ifdef COULD_HAVE_SSE2 -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out); -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out); -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out); #endif @@ -173,12 +190,12 @@ private: #ifdef COULD_HAVE_SSE2 struct InterleavedBitReaderSSE2 { public: - __attribute__((target("sse2"))) + TARGET_SSE2 InterleavedBitReaderSSE2(const unsigned char *in, unsigned bits) : in(reinterpret_cast(in)), bits(bits), mask(_mm_set1_epi32(mask_for_bits(bits))) {} // Can read 16 bytes past the end of the input (if bit_width == 0). - __attribute__((target("sse2"))) + TARGET_SSE2 __m128i read() { @@ -247,11 +264,11 @@ const unsigned char *decode_for(const unsigned char *in, unsigned num, Docid *ou #ifdef COULD_HAVE_SSE2 class DeltaDecoderSSE2 { public: - __attribute__((target("sse2"))) + TARGET_SSE2 DeltaDecoderSSE2(uint32_t prev_val) : prev_val(_mm_set1_epi32(prev_val)) {} - __attribute__((target("sse2"))) + TARGET_SSE2 __m128i decode(__m128i val) { @@ -271,7 +288,7 @@ private: }; template -__attribute__((target("sse2"))) inline void delta_decode_sse2(uint32_t *out) +TARGET_SSE2 inline void delta_decode_sse2(uint32_t *out) { DeltaDecoderSSE2 delta(out[-1]); __m128i *outvec = reinterpret_cast<__m128i *>(out); @@ -283,8 +300,7 @@ __attribute__((target("sse2"))) inline void delta_decode_sse2(uint32_t *out) // Can read 16 bytes past the end of its input (inherit from InterleavedBitReaderSSE2). template -__attribute__((target("sse2"))) -const unsigned char * +TARGET_SSE2 const unsigned char * decode_bitmap_sse2_unrolled(const unsigned char *in, uint32_t *out) { __m128i *outvec = reinterpret_cast<__m128i *>(out); @@ -307,8 +323,7 @@ decode_bitmap_sse2_unrolled(const unsigned char *in, uint32_t *out) // Can read 16 bytes past the end of its input (inherit from InterleavedBitReaderSSE2). template -__attribute__((target("sse2"))) -const unsigned char * +TARGET_SSE2 const unsigned char * decode_bitmap_sse2(const unsigned char *in, unsigned bit_width, uint32_t *out) { switch (bit_width) { @@ -420,6 +435,7 @@ const unsigned char *decode_for_interleaved(const unsigned char *in, Docid *out) } } +#ifndef SUPPRESS_DEFAULT // Does not read past the end of the input. TARGET_DEFAULT const unsigned char * @@ -427,11 +443,12 @@ decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out) { return decode_for_interleaved_generic<128>(in, out); } +#endif #ifdef COULD_HAVE_SSE2 // Specialized version for SSE2. // Can read 16 bytes past the end of the input (inherit from decode_bitmap_sse2()). -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out) { @@ -543,19 +560,21 @@ const unsigned char *decode_pfor_bitmap_interleaved(const unsigned char *in, Doc } } +#ifndef SUPPRESS_DEFAULT TARGET_DEFAULT const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out) { return decode_pfor_bitmap_interleaved_generic<128>(in, out); } +#endif #ifdef COULD_HAVE_SSE2 // Specialized version for SSE2. // // Can read 16 bytes past the end of the input (inherit from InterleavedBitReaderSSE2 // and decode_pfor_bitmap_exceptions()). -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out) { @@ -693,17 +712,19 @@ const unsigned char *decode_pfor_vb_interleaved(const unsigned char *in, Docid * } } +#ifndef SUPPRESS_DEFAULT TARGET_DEFAULT const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out) { return decode_pfor_vb_interleaved_generic<128>(in, out); } +#endif #ifdef COULD_HAVE_SSE2 // Specialized version for SSE2. // Can read 16 bytes past the end of the input (inherit from decode_bitmap_sse2()). -__attribute__((target("sse2"))) +TARGET_SSE2 const unsigned char * decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out) {