X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=plocate.cpp;h=a1cd97a4a4645c08880b014d4799f9d4c33182f6;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=9606d2f49db1892be4a145e752262bffaac77e0b;hpb=3f086da52c281f81cb2290dad17cfb0f1d659810;p=plocate diff --git a/plocate.cpp b/plocate.cpp index 9606d2f..a1cd97a 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -53,9 +54,11 @@ bool flush_cache = false; bool patterns_are_regex = false; bool use_extended_regex = false; bool match_basename = false; +bool check_existence = false; int64_t limit_matches = numeric_limits::max(); int64_t limit_left = numeric_limits::max(); bool stdout_is_tty = false; +bool literal_printing = false; static bool in_forked_child = false; steady_clock::time_point start; @@ -63,7 +66,7 @@ ZSTD_DDict *ddict = nullptr; class Corpus { public: - Corpus(int fd, IOUringEngine *engine); + Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine); ~Corpus(); void find_trigram(uint32_t trgm, function cb); void get_compressed_filename_block(uint32_t docid, function cb) const; @@ -81,7 +84,7 @@ public: Header hdr; }; -Corpus::Corpus(int fd, IOUringEngine *engine) +Corpus::Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine) : fd(fd), engine(engine) { if (flush_cache) { @@ -95,11 +98,11 @@ Corpus::Corpus(int fd, IOUringEngine *engine) complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0); if (memcmp(hdr.magic, "\0plocate", 8) != 0) { - fprintf(stderr, "plocate.db is corrupt or an old version; please rebuild it.\n"); + fprintf(stderr, "%s: database is corrupt or not a plocate database; please rebuild it.\n", filename_for_errors); exit(1); } if (hdr.version != 0 && hdr.version != 1) { - fprintf(stderr, "plocate.db has version %u, expected 0 or 1; please rebuild it.\n", hdr.version); + fprintf(stderr, "%s: has version %u, expected 0 or 1; please rebuild it.\n", filename_for_errors, hdr.version); exit(1); } if (hdr.version == 0) { @@ -152,8 +155,24 @@ size_t Corpus::get_num_filename_blocks() const return hdr.num_docids; } +template +void stat_if_needed(const char *filename, bool access_ok, IOUringEngine *engine, T cb) +{ + if (!access_ok || !check_existence) { + // Doesn't have access or doesn't care about existence, so no need to stat. + cb(access_ok); + } else if (engine == nullptr || !engine->get_supports_stat()) { + // Do a synchronous stat. + struct stat buf; + bool ok = lstat(filename, &buf) == 0; + cb(ok); + } else { + engine->submit_stat(filename, cb); + } +} + void scan_file_block(const vector &needles, string_view compressed, - AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer, + IOUringEngine *engine, AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer, atomic *matched) { unsigned long long uncompressed_len = ZSTD_getFrameContentSize(compressed.data(), compressed.size()); @@ -182,14 +201,16 @@ void scan_file_block(const vector &needles, string_view compressed, block[block.size() - 1] = '\0'; auto test_candidate = [&](const char *filename, uint64_t local_seq, uint64_t next_seq) { - access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) { - if (ok) { - ++*matched; - serializer->print(local_seq, next_seq - local_seq, filename); - } else { - serializer->print(local_seq, next_seq - local_seq, ""); - } - free(filename); + access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, engine, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) { + stat_if_needed(filename, ok, engine, [matched, serializer, local_seq, next_seq, filename](bool ok) { + if (ok) { + ++*matched; + serializer->print(local_seq, next_seq - local_seq, filename); + } else { + serializer->print(local_seq, next_seq - local_seq, ""); + } + free(filename); + }); }); }; @@ -240,8 +261,8 @@ size_t scan_docids(const vector &needles, const vector &docids atomic matched{ 0 }; for (size_t i = 0; i < docids.size(); ++i) { uint32_t docid = docids[i]; - corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, &docids_in_order](string_view compressed) { - scan_file_block(needles, compressed, &access_rx_cache, i, &docids_in_order, &matched); + corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, engine, &docids_in_order](string_view compressed) { + scan_file_block(needles, compressed, engine, &access_rx_cache, i, &docids_in_order, &matched); }); } engine->finish(); @@ -359,7 +380,8 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co for (uint32_t docid = io_docid; docid < last_docid; ++docid) { size_t relative_offset = offsets[docid] - offsets[io_docid]; size_t len = offsets[docid + 1] - offsets[docid]; - scan_file_block(*use_needles, { &compressed[relative_offset], len }, &access_rx_cache, docid, &receiver, &matched); + // IOUringEngine isn't thread-safe, so we do any needed stat()s synchronously (nullptr engine). + scan_file_block(*use_needles, { &compressed[relative_offset], len }, /*engine=*/nullptr, &access_rx_cache, docid, &receiver, &matched); } } }); @@ -467,7 +489,7 @@ uint64_t do_search_file(const vector &needles, const std::string &filena } IOUringEngine engine(/*slop_bytes=*/16); // 16 slop bytes as described in turbopfor.h. - Corpus corpus(fd, &engine); + Corpus corpus(fd, filename.c_str(), &engine); dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration(steady_clock::now() - start).count()); vector trigram_groups; @@ -698,6 +720,7 @@ uint64_t do_search_file_in_child(const vector &needles, const std::strin perror("write"); _exit(EXIT_FAILURE); } + fflush(stdout); _exit(EXIT_SUCCESS); } case -1: @@ -787,6 +810,7 @@ void usage() " -i, --ignore-case search case-insensitively\n" " -l, --limit LIMIT stop after LIMIT matches\n" " -0, --null delimit matches by NUL instead of newline\n" + " -N, --literal do not quote filenames, even if printing to a tty\n" " -r, --regexp interpret patterns as basic regexps (slow)\n" " --regex interpret patterns as extended regexps (slow)\n" " -w, --wholename search the entire path name (default; see -b)\n" @@ -813,10 +837,13 @@ int main(int argc, char **argv) static const struct option long_options[] = { { "help", no_argument, 0, 'h' }, { "count", no_argument, 0, 'c' }, + { "all", no_argument, 0, 'A' }, { "basename", no_argument, 0, 'b' }, { "database", required_argument, 0, 'd' }, + { "existing", no_argument, 0, 'e' }, { "ignore-case", no_argument, 0, 'i' }, { "limit", required_argument, 0, 'l' }, + { "literal", no_argument, 0, 'N' }, { "null", no_argument, 0, '0' }, { "version", no_argument, 0, 'V' }, { "regexp", no_argument, 0, 'r' }, @@ -831,11 +858,14 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); for (;;) { int option_index = 0; - int c = getopt_long(argc, argv, "bcd:hil:n:0rwVD", long_options, &option_index); + int c = getopt_long(argc, argv, "Abcd:ehil:n:N0rwVD", long_options, &option_index); if (c == -1) { break; } switch (c) { + case 'A': + // Ignored. + break; case 'b': match_basename = true; break; @@ -845,6 +875,9 @@ int main(int argc, char **argv) case 'd': parse_dbpaths(optarg, &dbpaths); break; + case 'e': + check_existence = true; + break; case 'h': usage(); exit(0); @@ -859,6 +892,9 @@ int main(int argc, char **argv) exit(1); } break; + case 'N': + literal_printing = true; + break; case '0': print_nul = true; break;