#include <algorithm>
#include <assert.h>
+#ifdef HAS_ENDIAN_H
#include <endian.h>
+#endif
#include <stdint.h>
#include <string.h>
#include <strings.h>
+// This is a mess. :-/ Maybe it would be good just to drop support for
+// multiversioning; the only platform it really helps is 32-bit x86.
+// This may change if we decide to use AVX or similar in the future, though.
#if defined(__i386__) || defined(__x86_64__)
+#ifdef __SSE2__
+#define COULD_HAVE_SSE2
+#define SUPPRESS_DEFAULT
+#include <immintrin.h>
+#define TARGET_SSE2
+#elif defined(HAS_FUNCTION_MULTIVERSIONING)
#define COULD_HAVE_SSE2
#include <immintrin.h>
+#define TARGET_SSE2 __attribute__((target("sse2")))
#define TARGET_DEFAULT __attribute__((target("default")))
#else
+#define TARGET_DEFAULT
+#endif
+#else
// Function multiversioning is x86-only.
#define TARGET_DEFAULT
#endif
#define dprintf(...)
//#define dprintf(...) fprintf(stderr, __VA_ARGS__);
+#ifndef SUPPRESS_DEFAULT
// Forward declarations to declare to the template code below that they exist.
// (These must seemingly be non-templates for function multiversioning to work.)
TARGET_DEFAULT
TARGET_DEFAULT
const unsigned char *
decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out);
+#endif
#ifdef COULD_HAVE_SSE2
-__attribute__((target("sse2")))
+TARGET_SSE2
const unsigned char *
decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out);
-__attribute__((target("sse2")))
+TARGET_SSE2
const unsigned char *
decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out);
-__attribute__((target("sse2")))
+TARGET_SSE2
const unsigned char *
decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out);
#endif
#ifdef COULD_HAVE_SSE2
struct InterleavedBitReaderSSE2 {
public:
- __attribute__((target("sse2")))
+ TARGET_SSE2
InterleavedBitReaderSSE2(const unsigned char *in, unsigned bits)
: in(reinterpret_cast<const __m128i *>(in)), bits(bits), mask(_mm_set1_epi32(mask_for_bits(bits))) {}
// Can read 16 bytes past the end of the input (if bit_width == 0).
- __attribute__((target("sse2")))
+ TARGET_SSE2
__m128i
read()
{
#ifdef COULD_HAVE_SSE2
class DeltaDecoderSSE2 {
public:
- __attribute__((target("sse2")))
+ TARGET_SSE2
DeltaDecoderSSE2(uint32_t prev_val)
: prev_val(_mm_set1_epi32(prev_val)) {}
- __attribute__((target("sse2")))
+ TARGET_SSE2
__m128i
decode(__m128i val)
{
};
template<unsigned BlockSize>
-__attribute__((target("sse2"))) inline void delta_decode_sse2(uint32_t *out)
+TARGET_SSE2 inline void delta_decode_sse2(uint32_t *out)
{
DeltaDecoderSSE2 delta(out[-1]);
__m128i *outvec = reinterpret_cast<__m128i *>(out);
// Can read 16 bytes past the end of its input (inherit from InterleavedBitReaderSSE2).
template<unsigned BlockSize, bool OrWithExisting, bool DeltaDecode, unsigned bit_width>
-__attribute__((target("sse2")))
-const unsigned char *
+TARGET_SSE2 const unsigned char *
decode_bitmap_sse2_unrolled(const unsigned char *in, uint32_t *out)
{
__m128i *outvec = reinterpret_cast<__m128i *>(out);
// Can read 16 bytes past the end of its input (inherit from InterleavedBitReaderSSE2).
template<unsigned BlockSize, bool OrWithExisting, bool DeltaDecode>
-__attribute__((target("sse2")))
-const unsigned char *
+TARGET_SSE2 const unsigned char *
decode_bitmap_sse2(const unsigned char *in, unsigned bit_width, uint32_t *out)
{
switch (bit_width) {
}
}
+#ifndef SUPPRESS_DEFAULT
// Does not read past the end of the input.
TARGET_DEFAULT
const unsigned char *
{
return decode_for_interleaved_generic<128>(in, out);
}
+#endif
#ifdef COULD_HAVE_SSE2
// Specialized version for SSE2.
// Can read 16 bytes past the end of the input (inherit from decode_bitmap_sse2()).
-__attribute__((target("sse2")))
+TARGET_SSE2
const unsigned char *
decode_for_interleaved_128_32(const unsigned char *in, uint32_t *out)
{
}
}
+#ifndef SUPPRESS_DEFAULT
TARGET_DEFAULT
const unsigned char *
decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out)
{
return decode_pfor_bitmap_interleaved_generic<128>(in, out);
}
+#endif
#ifdef COULD_HAVE_SSE2
// Specialized version for SSE2.
//
// Can read 16 bytes past the end of the input (inherit from InterleavedBitReaderSSE2
// and decode_pfor_bitmap_exceptions()).
-__attribute__((target("sse2")))
+TARGET_SSE2
const unsigned char *
decode_pfor_bitmap_interleaved_128_32(const unsigned char *in, uint32_t *out)
{
}
}
+#ifndef SUPPRESS_DEFAULT
TARGET_DEFAULT
const unsigned char *
decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out)
{
return decode_pfor_vb_interleaved_generic<128>(in, out);
}
+#endif
#ifdef COULD_HAVE_SSE2
// Specialized version for SSE2.
// Can read 16 bytes past the end of the input (inherit from decode_bitmap_sse2()).
-__attribute__((target("sse2")))
+TARGET_SSE2
const unsigned char *
decode_pfor_vb_interleaved_128_32(const unsigned char *in, uint32_t *out)
{