]> git.sesse.net Git - plocate/commitdiff
Fix searching for very short (1 or 2 bytes) queries.
authorSteinar H. Gunderson <steinar+git@gunderson.no>
Sat, 3 Oct 2020 08:49:10 +0000 (10:49 +0200)
committerSteinar H. Gunderson <steinar+git@gunderson.no>
Sat, 3 Oct 2020 08:49:10 +0000 (10:49 +0200)
plocate had assumptions about the layout of the file, that no longer
held. Use the pad field to simplify things.

This requires a database rebuild, but only for short queries.
Normal queries will continue to work, so there's no version bump.

db.h
plocate-build.cpp
plocate.cpp

diff --git a/db.h b/db.h
index cb0a7a8d332403b9aeb2aafa151505a6afc19a9b..ca8b3ea74ae513a16a6bb9ed42a4eccdd9ea40a0 100644 (file)
--- a/db.h
+++ b/db.h
@@ -8,7 +8,7 @@ struct Header {
        uint32_t version;  // 0.
        uint32_t hashtable_size;
        uint32_t extra_ht_slots;
-       uint32_t pad;   // Unused.
+       uint32_t num_docids;
        uint64_t hash_table_offset_bytes;
        uint64_t filename_index_offset_bytes;
 };
index 658aea2c63f6784f6afe2286441f9943b458934e..c6804497248f4a9a828200d784b9450f8986ad72 100644 (file)
@@ -377,6 +377,7 @@ void do_build(const char *infile, const char *outfile, int block_size)
        }
        corpus.flush_block();
        dprintf("Read %zu files from %s\n", corpus.num_files, infile);
+       hdr.num_docids = corpus.filename_blocks.size();
 
        // Stick an empty block at the end as sentinel.
        corpus.filename_blocks.push_back(ftell(outfp));
index fa70f76c6531f97f3d574217b95b71e510c8126f..fc8c4becdbde9f77f8490eef8a8e953426206b9b 100644 (file)
@@ -188,12 +188,7 @@ void Corpus::get_compressed_filename_block(uint32_t docid, function<void(string)
 
 size_t Corpus::get_num_filename_blocks() const
 {
-       // The beginning of the filename blocks is the end of the filename index blocks.
-       uint64_t end;
-       complete_pread(fd, &end, sizeof(end), hdr.filename_index_offset_bytes);
-
-       // Subtract the sentinel block.
-       return (end - hdr.filename_index_offset_bytes) / sizeof(uint64_t) - 1;
+       return hdr.num_docids;
 }
 
 size_t scan_file_block(const vector<string> &needles, string_view compressed,