From cb87fdb5ede1d5a9fa0a5a309d9ccf98f42fe0b7 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 27 Jul 2021 16:19:01 +0200 Subject: [PATCH] Support the -e (--existing) option from mlocate. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This can be useful if recently having deleted some files, and not wanting to do a database rebuild. Note that we don't support the --nofollow option (--follow is the default), since it's not clear what it would be useful for, and the mlocate source code says it “looks like a historical accident”. --- io_uring_engine.cpp | 2 +- options.h | 1 + plocate.1 | 9 ++++++++ plocate.cpp | 51 ++++++++++++++++++++++++++++++++------------- 4 files changed, 48 insertions(+), 15 deletions(-) diff --git a/io_uring_engine.cpp b/io_uring_engine.cpp index 116cf85..eab5e8d 100644 --- a/io_uring_engine.cpp +++ b/io_uring_engine.cpp @@ -123,7 +123,7 @@ void IOUringEngine::submit_stat_internal(io_uring_sqe *sqe, char *path, std::fun pending->stat.pathname = path; pending->stat.buf = new struct statx; - io_uring_prep_statx(sqe, /*fd=*/-1, pending->stat.pathname, AT_STATX_SYNC_AS_STAT, STATX_MODE, pending->stat.buf); + io_uring_prep_statx(sqe, /*fd=*/-1, pending->stat.pathname, AT_STATX_SYNC_AS_STAT | AT_SYMLINK_NOFOLLOW, STATX_MODE, pending->stat.buf); io_uring_sqe_set_data(sqe, pending); ++pending_reads; } diff --git a/options.h b/options.h index a372886..eba4900 100644 --- a/options.h +++ b/options.h @@ -10,6 +10,7 @@ extern bool use_debug; extern bool flush_cache; extern bool patterns_are_regex; extern bool use_extended_regex; +extern bool check_existence; extern int64_t limit_matches; extern int64_t limit_left; // Not strictly an option. extern bool stdout_is_tty; // Same. diff --git a/plocate.1 b/plocate.1 index 090fdb9..cc0e843 100644 --- a/plocate.1 +++ b/plocate.1 @@ -78,6 +78,15 @@ It is also possible to give multiple databases in one argument, separated by .BR : . (Any character, including : and \\, can be escaped by prepending a \\.) +.TP +\fB\-e\fR, \fB\-\-existing\fR +Print only entries that refer to files existing at the time +.B locate +is run. Note that unlike +.BR mlocate (1), +symlinks are not followed by default (and indeed, there is no option +to change this). + .TP \fB\-i\fR, \fB\-\-ignore\-case\fR Do a case-insensitive match as given by the current locale diff --git a/plocate.cpp b/plocate.cpp index 72dbe28..7cc4f5a 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -53,6 +53,7 @@ bool flush_cache = false; bool patterns_are_regex = false; bool use_extended_regex = false; bool match_basename = false; +bool check_existence = false; int64_t limit_matches = numeric_limits::max(); int64_t limit_left = numeric_limits::max(); bool stdout_is_tty = false; @@ -152,8 +153,24 @@ size_t Corpus::get_num_filename_blocks() const return hdr.num_docids; } +template +void stat_if_needed(const char *filename, bool access_ok, IOUringEngine *engine, T cb) +{ + if (!access_ok || !check_existence) { + // Doesn't have access or doesn't care about existence, so no need to stat. + cb(access_ok); + } else if (engine == nullptr || !engine->get_supports_stat()) { + // Do a synchronous stat. + struct stat buf; + bool ok = lstat(filename, &buf) == 0; + cb(ok); + } else { + engine->submit_stat(filename, cb); + } +} + void scan_file_block(const vector &needles, string_view compressed, - AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer, + IOUringEngine *engine, AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer, atomic *matched) { unsigned long long uncompressed_len = ZSTD_getFrameContentSize(compressed.data(), compressed.size()); @@ -182,14 +199,16 @@ void scan_file_block(const vector &needles, string_view compressed, block[block.size() - 1] = '\0'; auto test_candidate = [&](const char *filename, uint64_t local_seq, uint64_t next_seq) { - access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) { - if (ok) { - ++*matched; - serializer->print(local_seq, next_seq - local_seq, filename); - } else { - serializer->print(local_seq, next_seq - local_seq, ""); - } - free(filename); + access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, engine, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) { + stat_if_needed(filename, ok, engine, [matched, serializer, local_seq, next_seq, filename](bool ok) { + if (ok) { + ++*matched; + serializer->print(local_seq, next_seq - local_seq, filename); + } else { + serializer->print(local_seq, next_seq - local_seq, ""); + } + free(filename); + }); }); }; @@ -240,8 +259,8 @@ size_t scan_docids(const vector &needles, const vector &docids atomic matched{ 0 }; for (size_t i = 0; i < docids.size(); ++i) { uint32_t docid = docids[i]; - corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, &docids_in_order](string_view compressed) { - scan_file_block(needles, compressed, &access_rx_cache, i, &docids_in_order, &matched); + corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, engine, &docids_in_order](string_view compressed) { + scan_file_block(needles, compressed, engine, &access_rx_cache, i, &docids_in_order, &matched); }); } engine->finish(); @@ -328,7 +347,7 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co dprintf("Using %u worker threads for linear scan.\n", num_threads); unique_ptr threads(new WorkerThread[num_threads]); for (unsigned i = 0; i < num_threads; ++i) { - threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, &matched, i] { + threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, engine{ corpus.engine }, &matched, i] { // regcomp() takes a lock on the regex, so each thread will need its own. const vector *use_needles = &needles; vector recompiled_needles; @@ -359,7 +378,7 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co for (uint32_t docid = io_docid; docid < last_docid; ++docid) { size_t relative_offset = offsets[docid] - offsets[io_docid]; size_t len = offsets[docid + 1] - offsets[docid]; - scan_file_block(*use_needles, { &compressed[relative_offset], len }, &access_rx_cache, docid, &receiver, &matched); + scan_file_block(*use_needles, { &compressed[relative_offset], len }, engine, &access_rx_cache, docid, &receiver, &matched); } } }); @@ -816,6 +835,7 @@ int main(int argc, char **argv) { "count", no_argument, 0, 'c' }, { "basename", no_argument, 0, 'b' }, { "database", required_argument, 0, 'd' }, + { "existing", no_argument, 0, 'e' }, { "ignore-case", no_argument, 0, 'i' }, { "limit", required_argument, 0, 'l' }, { "null", no_argument, 0, '0' }, @@ -832,7 +852,7 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); for (;;) { int option_index = 0; - int c = getopt_long(argc, argv, "bcd:hil:n:0rwVD", long_options, &option_index); + int c = getopt_long(argc, argv, "bcd:ehil:n:0rwVD", long_options, &option_index); if (c == -1) { break; } @@ -846,6 +866,9 @@ int main(int argc, char **argv) case 'd': parse_dbpaths(optarg, &dbpaths); break; + case 'e': + check_existence = true; + break; case 'h': usage(); exit(0); -- 2.39.2