+ if (start >= s.size()) {
+ return { PREMATURE_END_UNIGRAM, 0 };
+ }
+ if (s[start] == '\\') {
+ // Escaped character.
+ if (start + 1 >= s.size()) {
+ return { PREMATURE_END_UNIGRAM, 1 };
+ } else {
+ return { (unsigned char)s[start + 1], 2 };
+ }
+ }
+ if (s[start] == '*' || s[start] == '?') {
+ // Wildcard.
+ return { WILDCARD_UNIGRAM, 1 };
+ }
+ if (s[start] == '[') {
+ // Character class; search to find the end.
+ size_t len = 1;
+ if (start + len >= s.size()) {
+ return { PREMATURE_END_UNIGRAM, len };
+ }
+ if (s[start + len] == '!') {
+ ++len;
+ }
+ if (start + len >= s.size()) {
+ return { PREMATURE_END_UNIGRAM, len };
+ }
+ if (s[start + len] == ']') {
+ ++len;
+ }
+ for (;;) {
+ if (start + len >= s.size()) {
+ return { PREMATURE_END_UNIGRAM, len };
+ }
+ if (s[start + len] == ']') {
+ return { WILDCARD_UNIGRAM, len + 1 };
+ }
+ ++len;
+ }