]> git.sesse.net Git - plocate/commitdiff
Make DatabaseBuilder write the file atomically.
authorSteinar H. Gunderson <steinar+nageru@gunderson.no>
Sat, 21 Nov 2020 14:34:59 +0000 (15:34 +0100)
committerSteinar H. Gunderson <steinar+nageru@gunderson.no>
Sat, 21 Nov 2020 14:34:59 +0000 (15:34 +0100)
By opening with O_TMPFILE, we guarantee we'll never be leaving
an unfinished file visible on the filesystem. The move across the
old one isn't atomic, but the window of failure is very small now.

database-builder.cpp
database-builder.h

index b95d3bbcdb654c9248f118fa886da6a1a85ef2cc..a35be2a55702cad0e8b5bdf6b1a29767da3ef8a3 100644 (file)
@@ -11,6 +11,7 @@
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#include <unistd.h>
 #include <zdict.h>
 #include <zstd.h>
 
@@ -335,10 +336,19 @@ unique_ptr<Trigram[]> create_hashtable(Corpus &corpus, const vector<uint32_t> &a
 }
 
 DatabaseBuilder::DatabaseBuilder(const char *outfile, int block_size, string dictionary)
-       : block_size(block_size)
+       : outfile(outfile), block_size(block_size)
 {
        umask(0027);
-       outfp = fopen(outfile, "wb");
+
+       string path = outfile;
+       path.resize(path.find_last_of('/') + 1);
+       int fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640);
+       if (fd == -1) {
+               perror(path.c_str());
+               exit(1);
+       }
+
+       outfp = fdopen(fd, "wb");
        if (outfp == nullptr) {
                perror(outfile);
                exit(1);
@@ -462,6 +472,17 @@ void DatabaseBuilder::finish_corpus()
        hdr.version = 1;
        fseek(outfp, 0, SEEK_SET);
        fwrite(&hdr, sizeof(hdr), 1, outfp);
+
+       // Give the file a proper name, making it visible in the file system.
+       // TODO: It would be nice to be able to do this atomically, like with rename.
+       unlink(outfile.c_str());
+       char procpath[256];
+       snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fileno(outfp));
+       if (linkat(AT_FDCWD, procpath, AT_FDCWD, outfile.c_str(), AT_SYMLINK_FOLLOW) == -1) {
+               perror("linkat");
+               exit(1);
+       }
+
        fclose(outfp);
 
        size_t total_bytes = (bytes_for_hashtable + bytes_for_posting_lists + bytes_for_filename_index + bytes_for_filenames);
index 95c94a0f8f29b42bfde691b11dbccb129de68280..e799105563270690099bffcbc9f9b196d49beb6f 100644 (file)
@@ -78,6 +78,7 @@ public:
 
 private:
        FILE *outfp;
+       std::string outfile;
        Header hdr;
        const int block_size;
        std::chrono::steady_clock::time_point corpus_start;