- while (ptr < current_block.c_str() + current_block.size()) {
- string_view s(ptr);
- if (s.size() >= 3) {
- for (size_t j = 0; j < s.size() - 2; ++j) {
- uint32_t trgm = read_trigram(s, j);
- add_docid(trgm, docid);
+ const char *end = ptr + current_block.size();
+ while (ptr < end - 3) { // Must be at least one filename left, that's at least three bytes.
+ if (ptr[0] == '\0') {
+ // This filename is zero bytes, so skip it (and the zero terminator).
+ ++ptr;
+ continue;
+ } else if (ptr[1] == '\0') {
+ // This filename is one byte, so skip it (and the zero terminator).
+ ptr += 2;
+ continue;
+ } else if (ptr[2] == '\0') {
+ // This filename is two bytes, so skip it (and the zero terminator).
+ ptr += 3;
+ continue;
+ }
+ for (;;) {
+ // NOTE: Will read one byte past the end of the trigram, but it's OK,
+ // since we always call it from contexts where there's a terminating zero byte.
+ uint32_t trgm;
+ memcpy(&trgm, ptr, sizeof(trgm));
+ ++ptr;
+ trgm = le32toh(trgm);
+ add_docid(trgm & 0xffffff, docid);
+ if (trgm <= 0xffffff) {
+ // Terminating zero byte, so we're done with this filename.
+ // Skip the remaining two bytes, and the zero terminator.
+ ptr += 3;
+ break;