From a18a5157796d8db72b0ea1dfea137090bd8a52b9 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 12 Feb 2021 21:15:24 +0100 Subject: [PATCH] Optimize trigram reading somewhat. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Helps 1–2% CPU. --- database-builder.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/database-builder.cpp b/database-builder.cpp index 11a1a5a..76479aa 100644 --- a/database-builder.cpp +++ b/database-builder.cpp @@ -5,6 +5,9 @@ #include #include +#ifdef HAS_ENDIAN_H +#include +#endif #include #include #include @@ -26,20 +29,14 @@ constexpr unsigned num_overflow_slots = 16; string zstd_compress(const string &src, ZSTD_CDict *cdict, string *tempbuf); -static inline uint32_t read_unigram(const string_view s, size_t idx) -{ - if (idx < s.size()) { - return (unsigned char)s[idx]; - } else { - return 0; - } -} - +// NOTE: Will read one byte past the end of the trigram, but it's OK, +// since we always call it from contexts where there's a terminating zero byte. static inline uint32_t read_trigram(const string_view s, size_t start) { - return read_unigram(s, start) | - (read_unigram(s, start + 1) << 8) | - (read_unigram(s, start + 2) << 16); + uint32_t trgm; + memcpy(&trgm, s.data() + start, sizeof(trgm)); + trgm = le32toh(trgm); + return trgm & 0xffffff; } class PostingListBuilder { -- 2.39.2