]> git.sesse.net Git - plocate/blobdiff - database-builder.h
Release plocate 1.1.22.
[plocate] / database-builder.h
index 97188e131e3c800b16585396c9d9cdae9e2493b5..731598b734f4340ab139c963198575ebd23a2e61 100644 (file)
@@ -4,10 +4,12 @@
 #include "db.h"
 
 #include <chrono>
+#include <fcntl.h>
 #include <memory>
 #include <random>
 #include <stddef.h>
 #include <string>
+#include <unistd.h>
 #include <utility>
 #include <vector>
 #include <zstd.h>
@@ -40,6 +42,9 @@ public:
        virtual void add_file(std::string filename, dir_time dt) = 0;
        virtual void flush_block() = 0;
        virtual void finish() { flush_block(); }
+
+       // EncodingCorpus only.
+       virtual size_t num_files_seen() const { return -1; }
 };
 
 class DictionaryBuilder : public DatabaseReceiver {
@@ -64,45 +69,12 @@ private:
        std::vector<size_t> lengths;
 };
 
-class Corpus : public DatabaseReceiver {
-public:
-       Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times);
-       ~Corpus();
-
-       void add_file(std::string filename, dir_time dt) override;
-       void flush_block() override;
-       void finish() override;
-
-       std::vector<uint64_t> filename_blocks;
-       size_t num_files = 0, num_files_in_block = 0, num_blocks = 0;
-       bool seen_trigram(uint32_t trgm)
-       {
-               return invindex[trgm] != nullptr;
-       }
-       PostingListBuilder &get_pl_builder(uint32_t trgm);
-       size_t num_trigrams() const;
-       std::string get_compressed_dir_times();
-
-private:
-       void compress_dir_times(size_t allowed_slop);
-
-       std::unique_ptr<PostingListBuilder *[]> invindex;
-       FILE *outfp;
-       std::string current_block;
-       std::string tempbuf;
-       const size_t block_size;
-       const bool store_dir_times;
-       ZSTD_CDict *cdict;
-
-       ZSTD_CStream *dir_time_ctx = nullptr;
-       std::string dir_times;  // Buffer of still-uncompressed data.
-       std::string dir_times_compressed;
-};
+class EncodingCorpus;
 
 class DatabaseBuilder {
 public:
        DatabaseBuilder(const char *outfile, gid_t owner, int block_size, std::string dictionary, bool check_visibility);
-       Corpus *start_corpus(bool store_dir_times);
+       DatabaseReceiver *start_corpus(bool store_dir_times);
        void set_next_dictionary(std::string next_dictionary);
        void set_conf_block(std::string conf_block);
        void finish_corpus();
@@ -110,10 +82,11 @@ public:
 private:
        FILE *outfp;
        std::string outfile;
+       std::string temp_filename;
        Header hdr;
        const int block_size;
        std::chrono::steady_clock::time_point corpus_start;
-       Corpus *corpus = nullptr;
+       EncodingCorpus *corpus = nullptr;
        ZSTD_CDict *cdict = nullptr;
        std::string next_dictionary, conf_block;
 };