X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=database-builder.h;h=e2c0c19f54c291d19d99edd87c47b23c48b2ac50;hb=63fd24efd774f1ed3f5dab82b934f35b7b039557;hp=e799105563270690099bffcbc9f9b196d49beb6f;hpb=acce03bb32917e21f2d1d49c5a460d3cd2f72a11;p=plocate diff --git a/database-builder.h b/database-builder.h index e799105..e2c0c19 100644 --- a/database-builder.h +++ b/database-builder.h @@ -8,15 +8,36 @@ #include #include #include +#include #include #include class PostingListBuilder; +// {0,0} means unknown or so current that it should never match. +// {-1,0} means it's not a directory. +struct dir_time { + int64_t sec; + int32_t nsec; + + bool operator<(const dir_time &other) const + { + if (sec != other.sec) + return sec < other.sec; + return nsec < other.nsec; + } + bool operator>=(const dir_time &other) const + { + return !(other < *this); + } +}; +constexpr dir_time unknown_dir_time{ 0, 0 }; +constexpr dir_time not_a_dir{ -1, 0 }; + class DatabaseReceiver { public: virtual ~DatabaseReceiver() = default; - virtual void add_file(std::string filename) = 0; + virtual void add_file(std::string filename, dir_time dt) = 0; virtual void flush_block() = 0; virtual void finish() { flush_block(); } }; @@ -25,7 +46,7 @@ class DictionaryBuilder : public DatabaseReceiver { public: DictionaryBuilder(size_t blocks_to_keep, size_t block_size) : blocks_to_keep(blocks_to_keep), block_size(block_size) {} - void add_file(std::string filename) override; + void add_file(std::string filename, dir_time dt) override; void flush_block() override; std::string train(size_t buf_size); @@ -45,10 +66,10 @@ private: class Corpus : public DatabaseReceiver { public: - Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict); + Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times); ~Corpus(); - void add_file(std::string filename) override; + void add_file(std::string filename, dir_time dt) override; void flush_block() override; void finish() override; @@ -60,20 +81,30 @@ public: } PostingListBuilder &get_pl_builder(uint32_t trgm); size_t num_trigrams() const; + std::string get_compressed_dir_times(); private: + void compress_dir_times(size_t allowed_slop); + std::unique_ptr invindex; FILE *outfp; std::string current_block; std::string tempbuf; const size_t block_size; + const bool store_dir_times; ZSTD_CDict *cdict; + + ZSTD_CStream *dir_time_ctx = nullptr; + std::string dir_times; // Buffer of still-uncompressed data. + std::string dir_times_compressed; }; class DatabaseBuilder { public: - DatabaseBuilder(const char *outfile, int block_size, std::string dictionary); - Corpus *start_corpus(); + DatabaseBuilder(const char *outfile, gid_t owner, int block_size, std::string dictionary); + Corpus *start_corpus(bool store_dir_times); + void set_next_dictionary(std::string next_dictionary); + void set_conf_block(std::string conf_block); void finish_corpus(); private: @@ -84,6 +115,7 @@ private: std::chrono::steady_clock::time_point corpus_start; Corpus *corpus = nullptr; ZSTD_CDict *cdict = nullptr; + std::string next_dictionary, conf_block; }; #endif // !defined(_DATABASE_BUILDER_H)