From: Steinar H. Gunderson Date: Fri, 12 Feb 2021 20:15:24 +0000 (+0100) Subject: Optimize trigram reading somewhat. X-Git-Tag: 1.1.4~6 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=a18a5157796d8db72b0ea1dfea137090bd8a52b9;p=plocate Optimize trigram reading somewhat. Helps 1–2% CPU. --- diff --git a/database-builder.cpp b/database-builder.cpp index 11a1a5a..76479aa 100644 --- a/database-builder.cpp +++ b/database-builder.cpp @@ -5,6 +5,9 @@ #include #include +#ifdef HAS_ENDIAN_H +#include +#endif #include #include #include @@ -26,20 +29,14 @@ constexpr unsigned num_overflow_slots = 16; string zstd_compress(const string &src, ZSTD_CDict *cdict, string *tempbuf); -static inline uint32_t read_unigram(const string_view s, size_t idx) -{ - if (idx < s.size()) { - return (unsigned char)s[idx]; - } else { - return 0; - } -} - +// NOTE: Will read one byte past the end of the trigram, but it's OK, +// since we always call it from contexts where there's a terminating zero byte. static inline uint32_t read_trigram(const string_view s, size_t start) { - return read_unigram(s, start) | - (read_unigram(s, start + 1) << 8) | - (read_unigram(s, start + 2) << 16); + uint32_t trgm; + memcpy(&trgm, s.data() + start, sizeof(trgm)); + trgm = le32toh(trgm); + return trgm & 0xffffff; } class PostingListBuilder {