plocate had assumptions about the layout of the file, that no longer
held. Use the pad field to simplify things.
This requires a database rebuild, but only for short queries.
Normal queries will continue to work, so there's no version bump.
uint32_t version; // 0.
uint32_t hashtable_size;
uint32_t extra_ht_slots;
- uint32_t pad; // Unused.
+ uint32_t num_docids;
uint64_t hash_table_offset_bytes;
uint64_t filename_index_offset_bytes;
};
}
corpus.flush_block();
dprintf("Read %zu files from %s\n", corpus.num_files, infile);
+ hdr.num_docids = corpus.filename_blocks.size();
// Stick an empty block at the end as sentinel.
corpus.filename_blocks.push_back(ftell(outfp));
size_t Corpus::get_num_filename_blocks() const
{
- // The beginning of the filename blocks is the end of the filename index blocks.
- uint64_t end;
- complete_pread(fd, &end, sizeof(end), hdr.filename_index_offset_bytes);
-
- // Subtract the sentinel block.
- return (end - hdr.filename_index_offset_bytes) / sizeof(uint64_t) - 1;
+ return hdr.num_docids;
}
size_t scan_file_block(const vector<string> &needles, string_view compressed,