X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=parse_trigrams.h;h=2387c9078c3bd6c0dded9bd8d940289ecee37aa1;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=810d005653fcf7ab3906f4ddc006670f3e99a4ec;hpb=efd7545c8ee2177aa13cf3ec8423d0e725c6a16d;p=plocate diff --git a/parse_trigrams.h b/parse_trigrams.h index 810d005..2387c90 100644 --- a/parse_trigrams.h +++ b/parse_trigrams.h @@ -11,7 +11,7 @@ // One or more trigrams, with an implicit OR between them. For case-sensitive searches, // this is just e.g. “abc”, but for case-insensitive, it would be “abc OR abC or aBc ...” etc. struct TrigramDisjunction { - unsigned index; // For debugging only. + unsigned index = -1; // For debugging only. // The alternatives as determined by parse_trigrams(). std::vector trigram_alternatives; @@ -53,6 +53,19 @@ struct TrigramDisjunction { // getting their own trigram). void parse_trigrams(const std::string &needle, bool ignore_case, std::vector *trigram_groups); +static constexpr uint32_t WILDCARD_UNIGRAM = 0xFF000000; +static constexpr uint32_t PREMATURE_END_UNIGRAM = 0xFF000001; + +// Reads a unigram, taking into account escaping (\ becomes ). +// Returns WILDCARD_UNIGRAM if there's an invalid unigram, ie., we found +// a glob character (?, * or a [] group). Returns PREMATURE_END_UNIGRAM if we went +// past the end of the string, e.g., a string that ends in a backslash. +// The second element is always the length. +std::pair read_unigram(const std::string &s, size_t start); + +// Reads a trigram, ie., three calls to read_unigram(). Needs to start on a valid unigram. +// Returns WILDCARD_UNIGRAM or PREMATURE_END_UNIGRAM of either of those occurred +// during reading of the string. uint32_t read_trigram(const std::string &s, size_t start); // For debugging.