X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=plocate.cpp;h=519024e07c5b31d04f96db1cc085e8313bb4c39b;hb=HEAD;hp=1c0b3d0463f09ea04a5f9a39156d91a1e089c7fe;hpb=67c741868bac4d4b845426d8b7e6126f4ec41232;p=plocate diff --git a/plocate.cpp b/plocate.cpp index 1c0b3d0..519024e 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -55,6 +55,7 @@ bool patterns_are_regex = false; bool use_extended_regex = false; bool match_basename = false; bool check_existence = false; +bool ignore_visibility = false; int64_t limit_matches = numeric_limits::max(); int64_t limit_left = numeric_limits::max(); bool stdout_is_tty = false; @@ -66,7 +67,7 @@ ZSTD_DDict *ddict = nullptr; class Corpus { public: - Corpus(int fd, IOUringEngine *engine); + Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine); ~Corpus(); void find_trigram(uint32_t trgm, function cb); void get_compressed_filename_block(uint32_t docid, function cb) const; @@ -84,7 +85,7 @@ public: Header hdr; }; -Corpus::Corpus(int fd, IOUringEngine *engine) +Corpus::Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine) : fd(fd), engine(engine) { if (flush_cache) { @@ -98,11 +99,11 @@ Corpus::Corpus(int fd, IOUringEngine *engine) complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0); if (memcmp(hdr.magic, "\0plocate", 8) != 0) { - fprintf(stderr, "plocate.db is corrupt or an old version; please rebuild it.\n"); + fprintf(stderr, "%s: database is corrupt or not a plocate database; please rebuild it.\n", filename_for_errors); exit(1); } if (hdr.version != 0 && hdr.version != 1) { - fprintf(stderr, "plocate.db has version %u, expected 0 or 1; please rebuild it.\n", hdr.version); + fprintf(stderr, "%s: has version %u, expected 0 or 1; please rebuild it.\n", filename_for_errors, hdr.version); exit(1); } if (hdr.version == 0) { @@ -114,6 +115,9 @@ Corpus::Corpus(int fd, IOUringEngine *engine) // This too. (We ignore the other max_version 2 fields.) hdr.check_visibility = true; } + if (ignore_visibility) { + hdr.check_visibility = false; + } } Corpus::~Corpus() @@ -367,7 +371,7 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co string compressed; { - unique_lock lock(mu); + unique_lock lock(mu); queue_added.wait(lock, [&work_queue, &done] { return !work_queue.empty() || done; }); if (done && work_queue.empty()) { return; @@ -397,7 +401,7 @@ uint64_t scan_all_docids(const vector &needles, int fd, const Corpus &co complete_pread(fd, &compressed[0], io_len, offsets[io_docid]); { - unique_lock lock(mu); + unique_lock lock(mu); queue_removed.wait(lock, [&work_queue] { return work_queue.size() < 256; }); // Allow ~2MB of data queued up. work_queue.emplace_back(io_docid, last_docid, move(compressed)); queue_added.notify_one(); // Avoid the thundering herd. @@ -489,7 +493,7 @@ uint64_t do_search_file(const vector &needles, const std::string &filena } IOUringEngine engine(/*slop_bytes=*/16); // 16 slop bytes as described in turbopfor.h. - Corpus corpus(fd, &engine); + Corpus corpus(fd, filename.c_str(), &engine); dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration(steady_clock::now() - start).count()); vector trigram_groups; @@ -571,7 +575,7 @@ uint64_t do_search_file(const vector &needles, const std::string &filena if (only_count) { printf("0\n"); } - exit(0); + exit(1); } } } @@ -834,9 +838,11 @@ int main(int argc, char **argv) constexpr int EXTENDED_REGEX = 1000; constexpr int FLUSH_CACHE = 1001; + constexpr int IGNORE_VISIBILITY = 1002; static const struct option long_options[] = { { "help", no_argument, 0, 'h' }, { "count", no_argument, 0, 'c' }, + { "all", no_argument, 0, 'A' }, { "basename", no_argument, 0, 'b' }, { "database", required_argument, 0, 'd' }, { "existing", no_argument, 0, 'e' }, @@ -851,17 +857,23 @@ int main(int argc, char **argv) { "debug", no_argument, 0, 'D' }, // Not documented. // Enable to test cold-cache behavior (except for access()). Not documented. { "flush-cache", no_argument, 0, FLUSH_CACHE }, + // Mostly useful to dump out the entire database, even if the given directories + // are gone. Disables sgid due to security. Not documented. + { "ignore-visibility", no_argument, 0, IGNORE_VISIBILITY }, { 0, 0, 0, 0 } }; setlocale(LC_ALL, ""); for (;;) { int option_index = 0; - int c = getopt_long(argc, argv, "bcd:ehil:n:N0rwVD", long_options, &option_index); + int c = getopt_long(argc, argv, "Abcd:ehil:n:N0rwVD", long_options, &option_index); if (c == -1) { break; } switch (c) { + case 'A': + // Ignored. + break; case 'b': match_basename = true; break; @@ -913,17 +925,22 @@ int main(int argc, char **argv) case 'V': version(); break; + case IGNORE_VISIBILITY: + ignore_visibility = true; + break; default: exit(1); } } - if (use_debug || flush_cache) { + if (use_debug || flush_cache || ignore_visibility) { // Debug information would leak information about which files exist, // so drop setgid before we open the file; one would either need to run // as root, or use a locally-built file. Doing the same thing for // flush_cache is mostly paranoia, in an attempt to prevent random users // from making plocate slow for everyone else. + // --ignore-visibility is obvious; if we allowed to keep sgid with + // that flag on, it would subvert the entire security model. if (setgid(getgid()) != 0) { perror("setgid"); exit(EXIT_FAILURE); @@ -968,7 +985,7 @@ int main(int argc, char **argv) } if (needles.empty()) { fprintf(stderr, "plocate: no pattern to search for specified\n"); - exit(0); + exit(1); } if (dbpaths.empty()) { @@ -996,4 +1013,6 @@ int main(int argc, char **argv) if (only_count) { printf("%" PRId64 "\n", matched); } + + return matched == 0; }