]> git.sesse.net Git - plocate/commitdiff
Make the builder delta-encode posting lists as we go.
authorSteinar H. Gunderson <steinar+git@gunderson.no>
Tue, 6 Oct 2020 20:54:44 +0000 (22:54 +0200)
committerSteinar H. Gunderson <steinar+git@gunderson.no>
Wed, 7 Oct 2020 22:44:35 +0000 (00:44 +0200)
It's slightly faster, and puts less complexity load on the encoder.

plocate-build.cpp

index 18d3bd90fde057792b5f6e14d914882ba26d007c..90141ffd1df2e14a026f66b242e552a788f28d99 100644 (file)
@@ -82,7 +82,7 @@ private:
        void write_header(uint32_t docid);
        void append_block();
 
-       vector<uint32_t> pending_docids;
+       vector<uint32_t> pending_deltas;
 
        uint32_t last_block_end, last_docid = -1;
 };
@@ -102,11 +102,11 @@ void PostingListBuilder::add_docid(uint32_t docid)
                return;
        }
 
+       pending_deltas.push_back(docid - last_docid - 1);
        last_docid = docid;
-       pending_docids.push_back(docid);
-       if (pending_docids.size() == 128) {
+       if (pending_deltas.size() == 128) {
                append_block();
-               pending_docids.clear();
+               pending_deltas.clear();
                last_block_end = docid;
        }
        ++num_docids;
@@ -114,7 +114,7 @@ void PostingListBuilder::add_docid(uint32_t docid)
 
 void PostingListBuilder::finish()
 {
-       if (pending_docids.empty()) {
+       if (pending_deltas.empty()) {
                return;
        }
 
@@ -122,15 +122,15 @@ void PostingListBuilder::finish()
 
        // No interleaving for partial blocks.
        unsigned char buf[P4NENC_BOUND(128)];
-       unsigned char *end = p4d1enc32(pending_docids.data(), pending_docids.size(), buf, last_block_end);
+       unsigned char *end = p4enc32(pending_deltas.data(), pending_deltas.size(), buf);
        encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
 }
 
 void PostingListBuilder::append_block()
 {
        unsigned char buf[P4NENC_BOUND(128)];
-       assert(pending_docids.size() == 128);
-       unsigned char *end = p4d1enc128v32(pending_docids.data(), 128, buf, last_block_end);
+       assert(pending_deltas.size() == 128);
+       unsigned char *end = p4enc128v32(pending_deltas.data(), 128, buf);
        encoded.append(reinterpret_cast<char *>(buf), reinterpret_cast<char *>(end));
 }