]> git.sesse.net Git - plocate/blobdiff - plocate.cpp
Release plocate 1.1.22.
[plocate] / plocate.cpp
index 44a1375ad53097778b3aa4a104155ed6bb529876..519024e07c5b31d04f96db1cc085e8313bb4c39b 100644 (file)
@@ -55,9 +55,11 @@ bool patterns_are_regex = false;
 bool use_extended_regex = false;
 bool match_basename = false;
 bool check_existence = false;
+bool ignore_visibility = false;
 int64_t limit_matches = numeric_limits<int64_t>::max();
 int64_t limit_left = numeric_limits<int64_t>::max();
 bool stdout_is_tty = false;
+bool literal_printing = false;
 static bool in_forked_child = false;
 
 steady_clock::time_point start;
@@ -65,7 +67,7 @@ ZSTD_DDict *ddict = nullptr;
 
 class Corpus {
 public:
-       Corpus(int fd, IOUringEngine *engine);
+       Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine);
        ~Corpus();
        void find_trigram(uint32_t trgm, function<void(const Trigram *trgmptr, size_t len)> cb);
        void get_compressed_filename_block(uint32_t docid, function<void(string_view)> cb) const;
@@ -83,7 +85,7 @@ public:
        Header hdr;
 };
 
-Corpus::Corpus(int fd, IOUringEngine *engine)
+Corpus::Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine)
        : fd(fd), engine(engine)
 {
        if (flush_cache) {
@@ -97,11 +99,11 @@ Corpus::Corpus(int fd, IOUringEngine *engine)
 
        complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0);
        if (memcmp(hdr.magic, "\0plocate", 8) != 0) {
-               fprintf(stderr, "plocate.db is corrupt or an old version; please rebuild it.\n");
+               fprintf(stderr, "%s: database is corrupt or not a plocate database; please rebuild it.\n", filename_for_errors);
                exit(1);
        }
        if (hdr.version != 0 && hdr.version != 1) {
-               fprintf(stderr, "plocate.db has version %u, expected 0 or 1; please rebuild it.\n", hdr.version);
+               fprintf(stderr, "%s: has version %u, expected 0 or 1; please rebuild it.\n", filename_for_errors, hdr.version);
                exit(1);
        }
        if (hdr.version == 0) {
@@ -113,6 +115,9 @@ Corpus::Corpus(int fd, IOUringEngine *engine)
                // This too. (We ignore the other max_version 2 fields.)
                hdr.check_visibility = true;
        }
+       if (ignore_visibility) {
+               hdr.check_visibility = false;
+       }
 }
 
 Corpus::~Corpus()
@@ -348,7 +353,7 @@ uint64_t scan_all_docids(const vector<Needle> &needles, int fd, const Corpus &co
        dprintf("Using %u worker threads for linear scan.\n", num_threads);
        unique_ptr<WorkerThread[]> threads(new WorkerThread[num_threads]);
        for (unsigned i = 0; i < num_threads; ++i) {
-               threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, engine{ corpus.engine }, &matched, i] {
+               threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, &matched, i] {
                        // regcomp() takes a lock on the regex, so each thread will need its own.
                        const vector<Needle> *use_needles = &needles;
                        vector<Needle> recompiled_needles;
@@ -366,7 +371,7 @@ uint64_t scan_all_docids(const vector<Needle> &needles, int fd, const Corpus &co
                                string compressed;
 
                                {
-                                       unique_lock<mutex> lock(mu);
+                                       unique_lock lock(mu);
                                        queue_added.wait(lock, [&work_queue, &done] { return !work_queue.empty() || done; });
                                        if (done && work_queue.empty()) {
                                                return;
@@ -379,7 +384,8 @@ uint64_t scan_all_docids(const vector<Needle> &needles, int fd, const Corpus &co
                                for (uint32_t docid = io_docid; docid < last_docid; ++docid) {
                                        size_t relative_offset = offsets[docid] - offsets[io_docid];
                                        size_t len = offsets[docid + 1] - offsets[docid];
-                                       scan_file_block(*use_needles, { &compressed[relative_offset], len }, engine, &access_rx_cache, docid, &receiver, &matched);
+                                       // IOUringEngine isn't thread-safe, so we do any needed stat()s synchronously (nullptr engine).
+                                       scan_file_block(*use_needles, { &compressed[relative_offset], len }, /*engine=*/nullptr, &access_rx_cache, docid, &receiver, &matched);
                                }
                        }
                });
@@ -395,7 +401,7 @@ uint64_t scan_all_docids(const vector<Needle> &needles, int fd, const Corpus &co
                complete_pread(fd, &compressed[0], io_len, offsets[io_docid]);
 
                {
-                       unique_lock<mutex> lock(mu);
+                       unique_lock lock(mu);
                        queue_removed.wait(lock, [&work_queue] { return work_queue.size() < 256; });  // Allow ~2MB of data queued up.
                        work_queue.emplace_back(io_docid, last_docid, move(compressed));
                        queue_added.notify_one();  // Avoid the thundering herd.
@@ -487,7 +493,7 @@ uint64_t do_search_file(const vector<Needle> &needles, const std::string &filena
        }
 
        IOUringEngine engine(/*slop_bytes=*/16);  // 16 slop bytes as described in turbopfor.h.
-       Corpus corpus(fd, &engine);
+       Corpus corpus(fd, filename.c_str(), &engine);
        dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration<float>(steady_clock::now() - start).count());
 
        vector<TrigramDisjunction> trigram_groups;
@@ -569,7 +575,7 @@ uint64_t do_search_file(const vector<Needle> &needles, const std::string &filena
                                                        if (only_count) {
                                                                printf("0\n");
                                                        }
-                                                       exit(0);
+                                                       exit(1);
                                                }
                                        }
                                }
@@ -808,6 +814,7 @@ void usage()
                "  -i, --ignore-case      search case-insensitively\n"
                "  -l, --limit LIMIT      stop after LIMIT matches\n"
                "  -0, --null             delimit matches by NUL instead of newline\n"
+               "  -N, --literal          do not quote filenames, even if printing to a tty\n"
                "  -r, --regexp           interpret patterns as basic regexps (slow)\n"
                "      --regex            interpret patterns as extended regexps (slow)\n"
                "  -w, --wholename        search the entire path name (default; see -b)\n"
@@ -831,14 +838,17 @@ int main(int argc, char **argv)
 
        constexpr int EXTENDED_REGEX = 1000;
        constexpr int FLUSH_CACHE = 1001;
+       constexpr int IGNORE_VISIBILITY = 1002;
        static const struct option long_options[] = {
                { "help", no_argument, 0, 'h' },
                { "count", no_argument, 0, 'c' },
+               { "all", no_argument, 0, 'A' },
                { "basename", no_argument, 0, 'b' },
                { "database", required_argument, 0, 'd' },
                { "existing", no_argument, 0, 'e' },
                { "ignore-case", no_argument, 0, 'i' },
                { "limit", required_argument, 0, 'l' },
+               { "literal", no_argument, 0, 'N' },
                { "null", no_argument, 0, '0' },
                { "version", no_argument, 0, 'V' },
                { "regexp", no_argument, 0, 'r' },
@@ -847,17 +857,23 @@ int main(int argc, char **argv)
                { "debug", no_argument, 0, 'D' },  // Not documented.
                // Enable to test cold-cache behavior (except for access()). Not documented.
                { "flush-cache", no_argument, 0, FLUSH_CACHE },
+               // Mostly useful to dump out the entire database, even if the given directories
+               // are gone. Disables sgid due to security. Not documented.
+               { "ignore-visibility", no_argument, 0, IGNORE_VISIBILITY },
                { 0, 0, 0, 0 }
        };
 
        setlocale(LC_ALL, "");
        for (;;) {
                int option_index = 0;
-               int c = getopt_long(argc, argv, "bcd:ehil:n:0rwVD", long_options, &option_index);
+               int c = getopt_long(argc, argv, "Abcd:ehil:n:N0rwVD", long_options, &option_index);
                if (c == -1) {
                        break;
                }
                switch (c) {
+               case 'A':
+                       // Ignored.
+                       break;
                case 'b':
                        match_basename = true;
                        break;
@@ -884,6 +900,9 @@ int main(int argc, char **argv)
                                exit(1);
                        }
                        break;
+               case 'N':
+                       literal_printing = true;
+                       break;
                case '0':
                        print_nul = true;
                        break;
@@ -906,17 +925,22 @@ int main(int argc, char **argv)
                case 'V':
                        version();
                        break;
+               case IGNORE_VISIBILITY:
+                       ignore_visibility = true;
+                       break;
                default:
                        exit(1);
                }
        }
 
-       if (use_debug || flush_cache) {
+       if (use_debug || flush_cache || ignore_visibility) {
                // Debug information would leak information about which files exist,
                // so drop setgid before we open the file; one would either need to run
                // as root, or use a locally-built file. Doing the same thing for
                // flush_cache is mostly paranoia, in an attempt to prevent random users
                // from making plocate slow for everyone else.
+               // --ignore-visibility is obvious; if we allowed to keep sgid with
+               // that flag on, it would subvert the entire security model.
                if (setgid(getgid()) != 0) {
                        perror("setgid");
                        exit(EXIT_FAILURE);
@@ -961,7 +985,7 @@ int main(int argc, char **argv)
        }
        if (needles.empty()) {
                fprintf(stderr, "plocate: no pattern to search for specified\n");
-               exit(0);
+               exit(1);
        }
 
        if (dbpaths.empty()) {
@@ -989,4 +1013,6 @@ int main(int argc, char **argv)
        if (only_count) {
                printf("%" PRId64 "\n", matched);
        }
+
+       return matched == 0;
 }