X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=plocate.cpp;h=44a1375ad53097778b3aa4a104155ed6bb529876;hb=519e26977d6269d8c887c3bb67ccd67edf5acaa5;hp=9606d2f49db1892be4a145e752262bffaac77e0b;hpb=3f086da52c281f81cb2290dad17cfb0f1d659810;p=plocate diff --git a/plocate.cpp b/plocate.cpp index 9606d2f..44a1375 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,7 @@ bool flush_cache = false; bool patterns_are_regex = false; bool use_extended_regex = false; bool match_basename = false; +bool check_existence = false; int64_t limit_matches = numeric_limits::max(); int64_t limit_left = numeric_limits::max(); bool stdout_is_tty = false; @@ -152,8 +154,24 @@ size_t Corpus::get_num_filename_blocks() const return hdr.num_docids; } +template +void stat_if_needed(const char *filename, bool access_ok, IOUringEngine *engine, T cb) +{ + if (!access_ok || !check_existence) { + // Doesn't have access or doesn't care about existence, so no need to stat. + cb(access_ok); + } else if (engine == nullptr || !engine->get_supports_stat()) { + // Do a synchronous stat. + struct stat buf; + bool ok = lstat(filename, &buf) == 0; + cb(ok); + } else { + engine->submit_stat(filename, cb); + } +} + void scan_file_block(const vector &needles, string_view compressed, - AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer, + IOUringEngine *engine, AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer, atomic *matched) { unsigned long long uncompressed_len = ZSTD_getFrameContentSize(compressed.data(), compressed.size()); @@ -182,14 +200,16 @@ void scan_file_block(const vector &needles, string_view compressed, block[block.size() - 1] = '\0'; auto test_candidate = [&](const char *filename, uint64_t local_seq, uint64_t next_seq) { - access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) { - if (ok) { - ++*matched; - serializer->print(local_seq, next_seq - local_seq, filename); - } else { - serializer->print(local_seq, next_seq - local_seq, ""); - } - free(filename); + access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, engine, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) { + stat_if_needed(filename, ok, engine, [matched, serializer, local_seq, next_seq, filename](bool ok) { + if (ok) { + ++*matched; + serializer->print(local_seq, next_seq - local_seq, filename); + } else { + serializer->print(local_seq, next_seq - local_seq, ""); + } + free(filename); + }); }); }; @@ -240,8 +260,8 @@ size_t scan_docids(const vector &needles, const vector &docids atomic matched{ 0 }; for (size_t i = 0; i < docids.size(); ++i) { uint32_t docid = docids[i]; - corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, &docids_in_order](string_view compressed) { - scan_file_block(needles, compressed, &access_rx_cache, i, &docids_in_order, &matched); + corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, engine, &docids_in_order](string_view compressed) { + scan_file_block(needles, compressed, engine, &access_rx_cache, i, &docids_in_order, &matched); }); } engine->finish(); @@ -328,7 +348,7 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co dprintf("Using %u worker threads for linear scan.\n", num_threads); unique_ptr threads(new WorkerThread[num_threads]); for (unsigned i = 0; i < num_threads; ++i) { - threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, &matched, i] { + threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, engine{ corpus.engine }, &matched, i] { // regcomp() takes a lock on the regex, so each thread will need its own. const vector *use_needles = &needles; vector recompiled_needles; @@ -359,7 +379,7 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co for (uint32_t docid = io_docid; docid < last_docid; ++docid) { size_t relative_offset = offsets[docid] - offsets[io_docid]; size_t len = offsets[docid + 1] - offsets[docid]; - scan_file_block(*use_needles, { &compressed[relative_offset], len }, &access_rx_cache, docid, &receiver, &matched); + scan_file_block(*use_needles, { &compressed[relative_offset], len }, engine, &access_rx_cache, docid, &receiver, &matched); } } }); @@ -698,6 +718,7 @@ uint64_t do_search_file_in_child(const vector &needles, const std::strin perror("write"); _exit(EXIT_FAILURE); } + fflush(stdout); _exit(EXIT_SUCCESS); } case -1: @@ -815,6 +836,7 @@ int main(int argc, char **argv) { "count", no_argument, 0, 'c' }, { "basename", no_argument, 0, 'b' }, { "database", required_argument, 0, 'd' }, + { "existing", no_argument, 0, 'e' }, { "ignore-case", no_argument, 0, 'i' }, { "limit", required_argument, 0, 'l' }, { "null", no_argument, 0, '0' }, @@ -831,7 +853,7 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); for (;;) { int option_index = 0; - int c = getopt_long(argc, argv, "bcd:hil:n:0rwVD", long_options, &option_index); + int c = getopt_long(argc, argv, "bcd:ehil:n:0rwVD", long_options, &option_index); if (c == -1) { break; } @@ -845,6 +867,9 @@ int main(int argc, char **argv) case 'd': parse_dbpaths(optarg, &dbpaths); break; + case 'e': + check_existence = true; + break; case 'h': usage(); exit(0);