]> git.sesse.net Git - plocate/commitdiff
Streamline the add_docid() path by removing a redundant comparison. Saves ~1% CPU.
authorSteinar H. Gunderson <steinar+git@gunderson.no>
Thu, 11 Feb 2021 08:45:30 +0000 (09:45 +0100)
committerSteinar H. Gunderson <steinar+git@gunderson.no>
Thu, 11 Feb 2021 08:45:30 +0000 (09:45 +0100)
database-builder.cpp

index d883451dd669e1b2bae6a444bb2c02987d02e13b..439e25a59e88980ef6a2f8f7bb9d38c43f7134ec 100644 (file)
@@ -45,6 +45,7 @@ static inline uint32_t read_trigram(const string_view s, size_t start)
 class PostingListBuilder {
 public:
        inline void add_docid(uint32_t docid);
+       inline void add_first_docid(uint32_t docid);
        void finish();
 
        string encoded;
@@ -66,13 +67,7 @@ void PostingListBuilder::add_docid(uint32_t docid)
                return;
        }
 
-       if (num_docids == 0) {
-               // Very first docid.
-               write_header(docid);
-               ++num_docids;
-               last_docid = docid;
-               return;
-       }
+       assert(num_docids != 0);
 
        pending_deltas.push_back(docid - last_docid - 1);
        last_docid = docid;
@@ -83,6 +78,13 @@ void PostingListBuilder::add_docid(uint32_t docid)
        ++num_docids;
 }
 
+void PostingListBuilder::add_first_docid(uint32_t docid)
+{
+       write_header(docid);
+       ++num_docids;
+       last_docid = docid;
+}
+
 void PostingListBuilder::finish()
 {
        if (pending_deltas.empty()) {
@@ -191,11 +193,18 @@ public:
        }
        size_t num_files_seen() const override { return num_files; }
        PostingListBuilder &get_pl_builder(uint32_t trgm)
+       {
+               return *invindex[trgm];
+       }
+
+       void add_docid(uint32_t trgm, uint32_t docid)
        {
                if (invindex[trgm] == nullptr) {
                        invindex[trgm] = new PostingListBuilder;
+                       invindex[trgm]->add_first_docid(docid);
+               } else {
+                       invindex[trgm]->add_docid(docid);
                }
-               return *invindex[trgm];
        }
 
        size_t num_trigrams() const;
@@ -306,7 +315,7 @@ void EncodingCorpus::flush_block()
                if (s.size() >= 3) {
                        for (size_t j = 0; j < s.size() - 2; ++j) {
                                uint32_t trgm = read_trigram(s, j);
-                               get_pl_builder(trgm).add_docid(docid);
+                               add_docid(trgm, docid);
                        }
                }
                ptr += s.size() + 1;