X-Git-Url: https://git.sesse.net/?p=plocate;a=blobdiff_plain;f=database-builder.h;h=731598b734f4340ab139c963198575ebd23a2e61;hp=97188e131e3c800b16585396c9d9cdae9e2493b5;hb=HEAD;hpb=d0f2469aedf852ba2d6949e59bfc1fff565960c9 diff --git a/database-builder.h b/database-builder.h index 97188e1..731598b 100644 --- a/database-builder.h +++ b/database-builder.h @@ -4,10 +4,12 @@ #include "db.h" #include +#include #include #include #include #include +#include #include #include #include @@ -40,6 +42,9 @@ public: virtual void add_file(std::string filename, dir_time dt) = 0; virtual void flush_block() = 0; virtual void finish() { flush_block(); } + + // EncodingCorpus only. + virtual size_t num_files_seen() const { return -1; } }; class DictionaryBuilder : public DatabaseReceiver { @@ -64,45 +69,12 @@ private: std::vector lengths; }; -class Corpus : public DatabaseReceiver { -public: - Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times); - ~Corpus(); - - void add_file(std::string filename, dir_time dt) override; - void flush_block() override; - void finish() override; - - std::vector filename_blocks; - size_t num_files = 0, num_files_in_block = 0, num_blocks = 0; - bool seen_trigram(uint32_t trgm) - { - return invindex[trgm] != nullptr; - } - PostingListBuilder &get_pl_builder(uint32_t trgm); - size_t num_trigrams() const; - std::string get_compressed_dir_times(); - -private: - void compress_dir_times(size_t allowed_slop); - - std::unique_ptr invindex; - FILE *outfp; - std::string current_block; - std::string tempbuf; - const size_t block_size; - const bool store_dir_times; - ZSTD_CDict *cdict; - - ZSTD_CStream *dir_time_ctx = nullptr; - std::string dir_times; // Buffer of still-uncompressed data. - std::string dir_times_compressed; -}; +class EncodingCorpus; class DatabaseBuilder { public: DatabaseBuilder(const char *outfile, gid_t owner, int block_size, std::string dictionary, bool check_visibility); - Corpus *start_corpus(bool store_dir_times); + DatabaseReceiver *start_corpus(bool store_dir_times); void set_next_dictionary(std::string next_dictionary); void set_conf_block(std::string conf_block); void finish_corpus(); @@ -110,10 +82,11 @@ public: private: FILE *outfp; std::string outfile; + std::string temp_filename; Header hdr; const int block_size; std::chrono::steady_clock::time_point corpus_start; - Corpus *corpus = nullptr; + EncodingCorpus *corpus = nullptr; ZSTD_CDict *cdict = nullptr; std::string next_dictionary, conf_block; };