X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=parse_trigrams.h;h=c845cdeab93cec0f0803b01d01cda232e422b546;hb=a2a3c6f0a7037c012839c5b24523b9f9e3f6f195;hp=810d005653fcf7ab3906f4ddc006670f3e99a4ec;hpb=efd7545c8ee2177aa13cf3ec8423d0e725c6a16d;p=plocate diff --git a/parse_trigrams.h b/parse_trigrams.h index 810d005..c845cde 100644 --- a/parse_trigrams.h +++ b/parse_trigrams.h @@ -11,7 +11,7 @@ // One or more trigrams, with an implicit OR between them. For case-sensitive searches, // this is just e.g. “abc”, but for case-insensitive, it would be “abc OR abC or aBc ...” etc. struct TrigramDisjunction { - unsigned index; // For debugging only. + unsigned index = -1; // For debugging only. // The alternatives as determined by parse_trigrams(). std::vector trigram_alternatives; @@ -53,6 +53,19 @@ struct TrigramDisjunction { // getting their own trigram). void parse_trigrams(const std::string &needle, bool ignore_case, std::vector *trigram_groups); +static constexpr uint32_t WILDCARD_UNIGRAM = 0xFF000000; +static constexpr uint32_t PREMATURE_END_UNIGRAM = 0xFF000001; + +// Reads a unigram, taking into account escaping (\ becomes ). +// Returns WILDCARD_UNIGRAM if there's an invalid unigram, ie., we found +// a glob character (?, * or a [] group). Returns EOS_UNIGRAM if we went +// past the end of the string, e.g., a string that ends in a backslash. +// The second element is always the length. +std::pair read_unigram(const std::string &s, size_t start); + +// Reads a trigram, ie., three calls to read_unigram(). Needs to start on a valid unigram. +// Returns WILDCARD_UNIGRAM or PREMATURE_END_UNIGRAM of either of those occurred +// during reading of the string. uint32_t read_trigram(const std::string &s, size_t start); // For debugging.