X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=database-builder.h;h=e2c0c19f54c291d19d99edd87c47b23c48b2ac50;hb=8a5857967a61958269e7f58a93913ab7fdab7bb8;hp=95c94a0f8f29b42bfde691b11dbccb129de68280;hpb=3d0c863edc6eb65c0dc3a13d2745cab5ef0a6773;p=plocate diff --git a/database-builder.h b/database-builder.h index 95c94a0..e2c0c19 100644 --- a/database-builder.h +++ b/database-builder.h @@ -8,15 +8,36 @@ #include #include #include +#include #include #include class PostingListBuilder; +// {0,0} means unknown or so current that it should never match. +// {-1,0} means it's not a directory. +struct dir_time { + int64_t sec; + int32_t nsec; + + bool operator<(const dir_time &other) const + { + if (sec != other.sec) + return sec < other.sec; + return nsec < other.nsec; + } + bool operator>=(const dir_time &other) const + { + return !(other < *this); + } +}; +constexpr dir_time unknown_dir_time{ 0, 0 }; +constexpr dir_time not_a_dir{ -1, 0 }; + class DatabaseReceiver { public: virtual ~DatabaseReceiver() = default; - virtual void add_file(std::string filename) = 0; + virtual void add_file(std::string filename, dir_time dt) = 0; virtual void flush_block() = 0; virtual void finish() { flush_block(); } }; @@ -25,7 +46,7 @@ class DictionaryBuilder : public DatabaseReceiver { public: DictionaryBuilder(size_t blocks_to_keep, size_t block_size) : blocks_to_keep(blocks_to_keep), block_size(block_size) {} - void add_file(std::string filename) override; + void add_file(std::string filename, dir_time dt) override; void flush_block() override; std::string train(size_t buf_size); @@ -45,10 +66,10 @@ private: class Corpus : public DatabaseReceiver { public: - Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict); + Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times); ~Corpus(); - void add_file(std::string filename) override; + void add_file(std::string filename, dir_time dt) override; void flush_block() override; void finish() override; @@ -60,29 +81,41 @@ public: } PostingListBuilder &get_pl_builder(uint32_t trgm); size_t num_trigrams() const; + std::string get_compressed_dir_times(); private: + void compress_dir_times(size_t allowed_slop); + std::unique_ptr invindex; FILE *outfp; std::string current_block; std::string tempbuf; const size_t block_size; + const bool store_dir_times; ZSTD_CDict *cdict; + + ZSTD_CStream *dir_time_ctx = nullptr; + std::string dir_times; // Buffer of still-uncompressed data. + std::string dir_times_compressed; }; class DatabaseBuilder { public: - DatabaseBuilder(const char *outfile, int block_size, std::string dictionary); - Corpus *start_corpus(); + DatabaseBuilder(const char *outfile, gid_t owner, int block_size, std::string dictionary); + Corpus *start_corpus(bool store_dir_times); + void set_next_dictionary(std::string next_dictionary); + void set_conf_block(std::string conf_block); void finish_corpus(); private: FILE *outfp; + std::string outfile; Header hdr; const int block_size; std::chrono::steady_clock::time_point corpus_start; Corpus *corpus = nullptr; ZSTD_CDict *cdict = nullptr; + std::string next_dictionary, conf_block; }; #endif // !defined(_DATABASE_BUILDER_H)