]> git.sesse.net Git - plocate/blobdiff - database-builder.cpp
Release plocate 1.1.22.
[plocate] / database-builder.cpp
index 75b05f6a2cf1a898c014fd1227b4c6471c4f0148..f9dfb715391b3be7c4c521710d48f86b3b10795b 100644 (file)
@@ -36,7 +36,8 @@ public:
        void finish();
 
        vector<unsigned char> encoded;
-       size_t get_num_docids() const {
+       size_t get_num_docids() const
+       {
                // Updated only when we flush, so check that we're finished.
                assert(pending_deltas.empty());
                return num_docids;
@@ -208,6 +209,7 @@ private:
 
        std::unique_ptr<PostingListBuilder *[]> invindex;
        FILE *outfp;
+       off_t outfp_pos;  // Cheaper than calling ftell(outfp) all the time.
        std::string current_block;
        std::string tempbuf;
        const size_t block_size;
@@ -219,9 +221,8 @@ private:
        std::string dir_times_compressed;
 };
 
-
 EncodingCorpus::EncodingCorpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times)
-       : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size), store_dir_times(store_dir_times), cdict(cdict)
+       : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), outfp_pos(ftell(outfp)), block_size(block_size), store_dir_times(store_dir_times), cdict(cdict)
 {
        fill(invindex.get(), invindex.get() + NUM_TRIGRAMS, nullptr);
        if (store_dir_times) {
@@ -318,7 +319,7 @@ void EncodingCorpus::flush_block()
                        ptr += 3;
                        continue;
                }
-               for ( ;; ) {
+               for (;;) {
                        // NOTE: Will read one byte past the end of the trigram, but it's OK,
                        // since we always call it from contexts where there's a terminating zero byte.
                        uint32_t trgm;
@@ -336,12 +337,13 @@ void EncodingCorpus::flush_block()
        }
 
        // Compress and add the filename block.
-       filename_blocks.push_back(ftell(outfp));
+       filename_blocks.push_back(outfp_pos);
        string compressed = zstd_compress(current_block, cdict, &tempbuf);
        if (fwrite(compressed.data(), compressed.size(), 1, outfp) != 1) {
                perror("fwrite()");
                exit(1);
        }
+       outfp_pos += compressed.size();
 
        current_block.clear();
        num_files_in_block = 0;
@@ -488,7 +490,7 @@ DatabaseBuilder::DatabaseBuilder(const char *outfile, gid_t owner, int block_siz
        int fd = -1;
 #ifdef O_TMPFILE
        fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640);
-       if (fd == -1 && errno != EOPNOTSUPP) {
+       if (fd == -1 && errno != EOPNOTSUPP && errno != EISDIR) {
                perror(path.c_str());
                exit(1);
        }
@@ -681,6 +683,10 @@ void DatabaseBuilder::finish_corpus()
        fseek(outfp, 0, SEEK_SET);
        fwrite(&hdr, sizeof(hdr), 1, outfp);
 
+       // This is needed on systems that simulate linkat() by copying
+       // the contents of the file instead of linking.
+       fflush(outfp);
+
        if (!temp_filename.empty()) {
                if (rename(temp_filename.c_str(), outfile.c_str()) == -1) {
                        perror("rename");