]> git.sesse.net Git - plocate/blobdiff - database-builder.h
Release plocate 1.1.22.
[plocate] / database-builder.h
index 95c94a0f8f29b42bfde691b11dbccb129de68280..731598b734f4340ab139c963198575ebd23a2e61 100644 (file)
@@ -4,28 +4,54 @@
 #include "db.h"
 
 #include <chrono>
+#include <fcntl.h>
 #include <memory>
 #include <random>
 #include <stddef.h>
 #include <string>
+#include <unistd.h>
+#include <utility>
 #include <vector>
 #include <zstd.h>
 
 class PostingListBuilder;
 
+// {0,0} means unknown or so current that it should never match.
+// {-1,0} means it's not a directory.
+struct dir_time {
+       int64_t sec;
+       int32_t nsec;
+
+       bool operator<(const dir_time &other) const
+       {
+               if (sec != other.sec)
+                       return sec < other.sec;
+               return nsec < other.nsec;
+       }
+       bool operator>=(const dir_time &other) const
+       {
+               return !(other < *this);
+       }
+};
+constexpr dir_time unknown_dir_time{ 0, 0 };
+constexpr dir_time not_a_dir{ -1, 0 };
+
 class DatabaseReceiver {
 public:
        virtual ~DatabaseReceiver() = default;
-       virtual void add_file(std::string filename) = 0;
+       virtual void add_file(std::string filename, dir_time dt) = 0;
        virtual void flush_block() = 0;
        virtual void finish() { flush_block(); }
+
+       // EncodingCorpus only.
+       virtual size_t num_files_seen() const { return -1; }
 };
 
 class DictionaryBuilder : public DatabaseReceiver {
 public:
        DictionaryBuilder(size_t blocks_to_keep, size_t block_size)
                : blocks_to_keep(blocks_to_keep), block_size(block_size) {}
-       void add_file(std::string filename) override;
+       void add_file(std::string filename, dir_time dt) override;
        void flush_block() override;
        std::string train(size_t buf_size);
 
@@ -43,46 +69,26 @@ private:
        std::vector<size_t> lengths;
 };
 
-class Corpus : public DatabaseReceiver {
-public:
-       Corpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict);
-       ~Corpus();
-
-       void add_file(std::string filename) override;
-       void flush_block() override;
-       void finish() override;
-
-       std::vector<uint64_t> filename_blocks;
-       size_t num_files = 0, num_files_in_block = 0, num_blocks = 0;
-       bool seen_trigram(uint32_t trgm)
-       {
-               return invindex[trgm] != nullptr;
-       }
-       PostingListBuilder &get_pl_builder(uint32_t trgm);
-       size_t num_trigrams() const;
-
-private:
-       std::unique_ptr<PostingListBuilder *[]> invindex;
-       FILE *outfp;
-       std::string current_block;
-       std::string tempbuf;
-       const size_t block_size;
-       ZSTD_CDict *cdict;
-};
+class EncodingCorpus;
 
 class DatabaseBuilder {
 public:
-       DatabaseBuilder(const char *outfile, int block_size, std::string dictionary);
-       Corpus *start_corpus();
+       DatabaseBuilder(const char *outfile, gid_t owner, int block_size, std::string dictionary, bool check_visibility);
+       DatabaseReceiver *start_corpus(bool store_dir_times);
+       void set_next_dictionary(std::string next_dictionary);
+       void set_conf_block(std::string conf_block);
        void finish_corpus();
 
 private:
        FILE *outfp;
+       std::string outfile;
+       std::string temp_filename;
        Header hdr;
        const int block_size;
        std::chrono::steady_clock::time_point corpus_start;
-       Corpus *corpus = nullptr;
+       EncodingCorpus *corpus = nullptr;
        ZSTD_CDict *cdict = nullptr;
+       std::string next_dictionary, conf_block;
 };
 
 #endif  // !defined(_DATABASE_BUILDER_H)