X-Git-Url: https://git.sesse.net/?p=plocate;a=blobdiff_plain;f=database-builder.h;h=731598b734f4340ab139c963198575ebd23a2e61;hp=95c94a0f8f29b42bfde691b11dbccb129de68280;hb=HEAD;hpb=3d0c863edc6eb65c0dc3a13d2745cab5ef0a6773 diff --git a/database-builder.h b/database-builder.h index 95c94a0..731598b 100644 --- a/database-builder.h +++ b/database-builder.h @@ -4,28 +4,54 @@ #include "db.h" #include +#include #include #include #include #include +#include +#include #include #include class PostingListBuilder; +// {0,0} means unknown or so current that it should never match. +// {-1,0} means it's not a directory. +struct dir_time { + int64_t sec; + int32_t nsec; + + bool operator<(const dir_time &other) const + { + if (sec != other.sec) + return sec < other.sec; + return nsec < other.nsec; + } + bool operator>=(const dir_time &other) const + { + return !(other < *this); + } +}; +constexpr dir_time unknown_dir_time{ 0, 0 }; +constexpr dir_time not_a_dir{ -1, 0 }; + class DatabaseReceiver { public: virtual ~DatabaseReceiver() = default; - virtual void add_file(std::string filename) = 0; + virtual void add_file(std::string filename, dir_time dt) = 0; virtual void flush_block() = 0; virtual void finish() { flush_block(); } + + // EncodingCorpus only. + virtual size_t num_files_seen() const { return -1; } }; class DictionaryBuilder : public DatabaseReceiver { public: DictionaryBuilder(size_t blocks_to_keep, size_t block_size) : blocks_to_keep(blocks_to_keep), block_size(block_size) {} - void add_file(std::string filename) override; + void add_file(std::string filename, dir_time dt) override; void flush_block() override; std::string train(size_t buf_size); @@ -43,46 +69,26 @@ private: std::vector lengths; }; -class Corpus : public DatabaseReceiver { -public: - Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict); - ~Corpus(); - - void add_file(std::string filename) override; - void flush_block() override; - void finish() override; - - std::vector filename_blocks; - size_t num_files = 0, num_files_in_block = 0, num_blocks = 0; - bool seen_trigram(uint32_t trgm) - { - return invindex[trgm] != nullptr; - } - PostingListBuilder &get_pl_builder(uint32_t trgm); - size_t num_trigrams() const; - -private: - std::unique_ptr invindex; - FILE *outfp; - std::string current_block; - std::string tempbuf; - const size_t block_size; - ZSTD_CDict *cdict; -}; +class EncodingCorpus; class DatabaseBuilder { public: - DatabaseBuilder(const char *outfile, int block_size, std::string dictionary); - Corpus *start_corpus(); + DatabaseBuilder(const char *outfile, gid_t owner, int block_size, std::string dictionary, bool check_visibility); + DatabaseReceiver *start_corpus(bool store_dir_times); + void set_next_dictionary(std::string next_dictionary); + void set_conf_block(std::string conf_block); void finish_corpus(); private: FILE *outfp; + std::string outfile; + std::string temp_filename; Header hdr; const int block_size; std::chrono::steady_clock::time_point corpus_start; - Corpus *corpus = nullptr; + EncodingCorpus *corpus = nullptr; ZSTD_CDict *cdict = nullptr; + std::string next_dictionary, conf_block; }; #endif // !defined(_DATABASE_BUILDER_H)