From 2983b424187a853bcde96a8feff6ef0ae48db7d5 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 21 Nov 2020 15:34:59 +0100 Subject: [PATCH] Make DatabaseBuilder write the file atomically. By opening with O_TMPFILE, we guarantee we'll never be leaving an unfinished file visible on the filesystem. The move across the old one isn't atomic, but the window of failure is very small now. --- database-builder.cpp | 25 +++++++++++++++++++++++-- database-builder.h | 1 + 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/database-builder.cpp b/database-builder.cpp index b95d3bb..a35be2a 100644 --- a/database-builder.cpp +++ b/database-builder.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -335,10 +336,19 @@ unique_ptr create_hashtable(Corpus &corpus, const vector &a } DatabaseBuilder::DatabaseBuilder(const char *outfile, int block_size, string dictionary) - : block_size(block_size) + : outfile(outfile), block_size(block_size) { umask(0027); - outfp = fopen(outfile, "wb"); + + string path = outfile; + path.resize(path.find_last_of('/') + 1); + int fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640); + if (fd == -1) { + perror(path.c_str()); + exit(1); + } + + outfp = fdopen(fd, "wb"); if (outfp == nullptr) { perror(outfile); exit(1); @@ -462,6 +472,17 @@ void DatabaseBuilder::finish_corpus() hdr.version = 1; fseek(outfp, 0, SEEK_SET); fwrite(&hdr, sizeof(hdr), 1, outfp); + + // Give the file a proper name, making it visible in the file system. + // TODO: It would be nice to be able to do this atomically, like with rename. + unlink(outfile.c_str()); + char procpath[256]; + snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fileno(outfp)); + if (linkat(AT_FDCWD, procpath, AT_FDCWD, outfile.c_str(), AT_SYMLINK_FOLLOW) == -1) { + perror("linkat"); + exit(1); + } + fclose(outfp); size_t total_bytes = (bytes_for_hashtable + bytes_for_posting_lists + bytes_for_filename_index + bytes_for_filenames); diff --git a/database-builder.h b/database-builder.h index 95c94a0..e799105 100644 --- a/database-builder.h +++ b/database-builder.h @@ -78,6 +78,7 @@ public: private: FILE *outfp; + std::string outfile; Header hdr; const int block_size; std::chrono::steady_clock::time_point corpus_start; -- 2.39.2