]> git.sesse.net Git - plocate/blobdiff - database-builder.cpp
Release plocate 1.1.22.
[plocate] / database-builder.cpp
index da52cdb57764b6d60247c74e4318a84872503c49..f9dfb715391b3be7c4c521710d48f86b3b10795b 100644 (file)
@@ -36,7 +36,8 @@ public:
        void finish();
 
        vector<unsigned char> encoded;
-       size_t get_num_docids() const {
+       size_t get_num_docids() const
+       {
                // Updated only when we flush, so check that we're finished.
                assert(pending_deltas.empty());
                return num_docids;
@@ -208,6 +209,7 @@ private:
 
        std::unique_ptr<PostingListBuilder *[]> invindex;
        FILE *outfp;
+       off_t outfp_pos;  // Cheaper than calling ftell(outfp) all the time.
        std::string current_block;
        std::string tempbuf;
        const size_t block_size;
@@ -219,9 +221,8 @@ private:
        std::string dir_times_compressed;
 };
 
-
 EncodingCorpus::EncodingCorpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times)
-       : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size), store_dir_times(store_dir_times), cdict(cdict)
+       : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), outfp_pos(ftell(outfp)), block_size(block_size), store_dir_times(store_dir_times), cdict(cdict)
 {
        fill(invindex.get(), invindex.get() + NUM_TRIGRAMS, nullptr);
        if (store_dir_times) {
@@ -318,7 +319,7 @@ void EncodingCorpus::flush_block()
                        ptr += 3;
                        continue;
                }
-               for ( ;; ) {
+               for (;;) {
                        // NOTE: Will read one byte past the end of the trigram, but it's OK,
                        // since we always call it from contexts where there's a terminating zero byte.
                        uint32_t trgm;
@@ -336,12 +337,13 @@ void EncodingCorpus::flush_block()
        }
 
        // Compress and add the filename block.
-       filename_blocks.push_back(ftell(outfp));
+       filename_blocks.push_back(outfp_pos);
        string compressed = zstd_compress(current_block, cdict, &tempbuf);
        if (fwrite(compressed.data(), compressed.size(), 1, outfp) != 1) {
                perror("fwrite()");
                exit(1);
        }
+       outfp_pos += compressed.size();
 
        current_block.clear();
        num_files_in_block = 0;
@@ -485,24 +487,26 @@ DatabaseBuilder::DatabaseBuilder(const char *outfile, gid_t owner, int block_siz
        if (path.empty()) {
                path = ".";
        }
+       int fd = -1;
 #ifdef O_TMPFILE
-       int fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640);
-       if (fd == -1) {
+       fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640);
+       if (fd == -1 && errno != EOPNOTSUPP && errno != EISDIR) {
                perror(path.c_str());
                exit(1);
        }
-#else
-       temp_filename = string(outfile) + ".XXXXXX";
-       int fd = mkstemp(&temp_filename[0]);
+#endif
        if (fd == -1) {
-               perror(temp_filename.c_str());
-               exit(1);
-       }
-       if (fchmod(fd, 0640) == -1) {
-               perror("fchmod");
-               exit(1);
+               temp_filename = string(outfile) + ".XXXXXX";
+               fd = mkstemp(&temp_filename[0]);
+               if (fd == -1) {
+                       perror(temp_filename.c_str());
+                       exit(1);
+               }
+               if (fchmod(fd, 0640) == -1) {
+                       perror("fchmod");
+                       exit(1);
+               }
        }
-#endif
 
        if (owner != (gid_t)-1) {
                if (fchown(fd, (uid_t)-1, owner) == -1) {
@@ -679,22 +683,28 @@ void DatabaseBuilder::finish_corpus()
        fseek(outfp, 0, SEEK_SET);
        fwrite(&hdr, sizeof(hdr), 1, outfp);
 
+       // This is needed on systems that simulate linkat() by copying
+       // the contents of the file instead of linking.
+       fflush(outfp);
+
+       if (!temp_filename.empty()) {
+               if (rename(temp_filename.c_str(), outfile.c_str()) == -1) {
+                       perror("rename");
+                       exit(1);
+               }
+       } else {
 #ifdef O_TMPFILE
-       // Give the file a proper name, making it visible in the file system.
-       // TODO: It would be nice to be able to do this atomically, like with rename.
-       unlink(outfile.c_str());
-       char procpath[256];
-       snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fileno(outfp));
-       if (linkat(AT_FDCWD, procpath, AT_FDCWD, outfile.c_str(), AT_SYMLINK_FOLLOW) == -1) {
-               perror("linkat");
-               exit(1);
-       }
-#else
-       if (rename(temp_filename.c_str(), outfile.c_str()) == -1) {
-               perror("rename");
-               exit(1);
-       }
+               // Give the file a proper name, making it visible in the file system.
+               // TODO: It would be nice to be able to do this atomically, like with rename.
+               unlink(outfile.c_str());
+               char procpath[256];
+               snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fileno(outfp));
+               if (linkat(AT_FDCWD, procpath, AT_FDCWD, outfile.c_str(), AT_SYMLINK_FOLLOW) == -1) {
+                       perror("linkat");
+                       exit(1);
+               }
 #endif
+       }
 
        fclose(outfp);