X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=database-builder.cpp;h=419e012ca94c2625471680de17b67f07716ecec8;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=da52cdb57764b6d60247c74e4318a84872503c49;hpb=0553e4c38aac88b13638ddacd46c7e7a19fcba3e;p=plocate diff --git a/database-builder.cpp b/database-builder.cpp index da52cdb..419e012 100644 --- a/database-builder.cpp +++ b/database-builder.cpp @@ -36,7 +36,8 @@ public: void finish(); vector encoded; - size_t get_num_docids() const { + size_t get_num_docids() const + { // Updated only when we flush, so check that we're finished. assert(pending_deltas.empty()); return num_docids; @@ -208,6 +209,7 @@ private: std::unique_ptr invindex; FILE *outfp; + off_t outfp_pos; // Cheaper than calling ftell(outfp) all the time. std::string current_block; std::string tempbuf; const size_t block_size; @@ -219,9 +221,8 @@ private: std::string dir_times_compressed; }; - EncodingCorpus::EncodingCorpus(FILE *outfp, size_t block_size, ZSTD_CDict *cdict, bool store_dir_times) - : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), block_size(block_size), store_dir_times(store_dir_times), cdict(cdict) + : invindex(new PostingListBuilder *[NUM_TRIGRAMS]), outfp(outfp), outfp_pos(ftell(outfp)), block_size(block_size), store_dir_times(store_dir_times), cdict(cdict) { fill(invindex.get(), invindex.get() + NUM_TRIGRAMS, nullptr); if (store_dir_times) { @@ -318,7 +319,7 @@ void EncodingCorpus::flush_block() ptr += 3; continue; } - for ( ;; ) { + for (;;) { // NOTE: Will read one byte past the end of the trigram, but it's OK, // since we always call it from contexts where there's a terminating zero byte. uint32_t trgm; @@ -336,12 +337,13 @@ void EncodingCorpus::flush_block() } // Compress and add the filename block. - filename_blocks.push_back(ftell(outfp)); + filename_blocks.push_back(outfp_pos); string compressed = zstd_compress(current_block, cdict, &tempbuf); if (fwrite(compressed.data(), compressed.size(), 1, outfp) != 1) { perror("fwrite()"); exit(1); } + outfp_pos += compressed.size(); current_block.clear(); num_files_in_block = 0; @@ -485,24 +487,26 @@ DatabaseBuilder::DatabaseBuilder(const char *outfile, gid_t owner, int block_siz if (path.empty()) { path = "."; } + int fd = -1; #ifdef O_TMPFILE - int fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640); - if (fd == -1) { + fd = open(path.c_str(), O_WRONLY | O_TMPFILE, 0640); + if (fd == -1 && errno != EOPNOTSUPP && errno != EISDIR) { perror(path.c_str()); exit(1); } -#else - temp_filename = string(outfile) + ".XXXXXX"; - int fd = mkstemp(&temp_filename[0]); +#endif if (fd == -1) { - perror(temp_filename.c_str()); - exit(1); - } - if (fchmod(fd, 0640) == -1) { - perror("fchmod"); - exit(1); + temp_filename = string(outfile) + ".XXXXXX"; + fd = mkstemp(&temp_filename[0]); + if (fd == -1) { + perror(temp_filename.c_str()); + exit(1); + } + if (fchmod(fd, 0640) == -1) { + perror("fchmod"); + exit(1); + } } -#endif if (owner != (gid_t)-1) { if (fchown(fd, (uid_t)-1, owner) == -1) { @@ -679,22 +683,24 @@ void DatabaseBuilder::finish_corpus() fseek(outfp, 0, SEEK_SET); fwrite(&hdr, sizeof(hdr), 1, outfp); + if (!temp_filename.empty()) { + if (rename(temp_filename.c_str(), outfile.c_str()) == -1) { + perror("rename"); + exit(1); + } + } else { #ifdef O_TMPFILE - // Give the file a proper name, making it visible in the file system. - // TODO: It would be nice to be able to do this atomically, like with rename. - unlink(outfile.c_str()); - char procpath[256]; - snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fileno(outfp)); - if (linkat(AT_FDCWD, procpath, AT_FDCWD, outfile.c_str(), AT_SYMLINK_FOLLOW) == -1) { - perror("linkat"); - exit(1); - } -#else - if (rename(temp_filename.c_str(), outfile.c_str()) == -1) { - perror("rename"); - exit(1); - } + // Give the file a proper name, making it visible in the file system. + // TODO: It would be nice to be able to do this atomically, like with rename. + unlink(outfile.c_str()); + char procpath[256]; + snprintf(procpath, sizeof(procpath), "/proc/self/fd/%d", fileno(outfp)); + if (linkat(AT_FDCWD, procpath, AT_FDCWD, outfile.c_str(), AT_SYMLINK_FOLLOW) == -1) { + perror("linkat"); + exit(1); + } #endif + } fclose(outfp);