-class Corpus : public DatabaseReceiver {
-public:
- Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times);
- ~Corpus();
-
- void add_file(std::string filename, dir_time dt) override;
- void flush_block() override;
- void finish() override;
-
- std::vector<uint64_t> filename_blocks;
- size_t num_files = 0, num_files_in_block = 0, num_blocks = 0;
- bool seen_trigram(uint32_t trgm)
- {
- return invindex[trgm] != nullptr;
- }
- PostingListBuilder &get_pl_builder(uint32_t trgm);
- size_t num_trigrams() const;
- std::string get_compressed_dir_times();
-
-private:
- void compress_dir_times(size_t allowed_slop);
-
- std::unique_ptr<PostingListBuilder *[]> invindex;
- FILE *outfp;
- std::string current_block;
- std::string tempbuf;
- const size_t block_size;
- const bool store_dir_times;
- ZSTD_CDict *cdict;
-
- ZSTD_CStream *dir_time_ctx = nullptr;
- std::string dir_times; // Buffer of still-uncompressed data.
- std::string dir_times_compressed;
-};