]> git.sesse.net Git - plocate/blobdiff - plocate.cpp
Remove dependency on non-POSIX header error.h.
[plocate] / plocate.cpp
index 72dbe287434ac848d705f17b02f58929bdd4b16c..a1cd97a4a4645c08880b014d4799f9d4c33182f6 100644 (file)
@@ -31,6 +31,7 @@
 #include <string.h>
 #include <string>
 #include <string_view>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <thread>
@@ -53,9 +54,11 @@ bool flush_cache = false;
 bool patterns_are_regex = false;
 bool use_extended_regex = false;
 bool match_basename = false;
+bool check_existence = false;
 int64_t limit_matches = numeric_limits<int64_t>::max();
 int64_t limit_left = numeric_limits<int64_t>::max();
 bool stdout_is_tty = false;
+bool literal_printing = false;
 static bool in_forked_child = false;
 
 steady_clock::time_point start;
@@ -63,7 +66,7 @@ ZSTD_DDict *ddict = nullptr;
 
 class Corpus {
 public:
-       Corpus(int fd, IOUringEngine *engine);
+       Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine);
        ~Corpus();
        void find_trigram(uint32_t trgm, function<void(const Trigram *trgmptr, size_t len)> cb);
        void get_compressed_filename_block(uint32_t docid, function<void(string_view)> cb) const;
@@ -81,7 +84,7 @@ public:
        Header hdr;
 };
 
-Corpus::Corpus(int fd, IOUringEngine *engine)
+Corpus::Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine)
        : fd(fd), engine(engine)
 {
        if (flush_cache) {
@@ -95,11 +98,11 @@ Corpus::Corpus(int fd, IOUringEngine *engine)
 
        complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0);
        if (memcmp(hdr.magic, "\0plocate", 8) != 0) {
-               fprintf(stderr, "plocate.db is corrupt or an old version; please rebuild it.\n");
+               fprintf(stderr, "%s: database is corrupt or not a plocate database; please rebuild it.\n", filename_for_errors);
                exit(1);
        }
        if (hdr.version != 0 && hdr.version != 1) {
-               fprintf(stderr, "plocate.db has version %u, expected 0 or 1; please rebuild it.\n", hdr.version);
+               fprintf(stderr, "%s: has version %u, expected 0 or 1; please rebuild it.\n", filename_for_errors, hdr.version);
                exit(1);
        }
        if (hdr.version == 0) {
@@ -152,8 +155,24 @@ size_t Corpus::get_num_filename_blocks() const
        return hdr.num_docids;
 }
 
+template<class T>
+void stat_if_needed(const char *filename, bool access_ok, IOUringEngine *engine, T cb)
+{
+       if (!access_ok || !check_existence) {
+               // Doesn't have access or doesn't care about existence, so no need to stat.
+               cb(access_ok);
+       } else if (engine == nullptr || !engine->get_supports_stat()) {
+               // Do a synchronous stat.
+               struct stat buf;
+               bool ok = lstat(filename, &buf) == 0;
+               cb(ok);
+       } else {
+               engine->submit_stat(filename, cb);
+       }
+}
+
 void scan_file_block(const vector<Needle> &needles, string_view compressed,
-                     AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer,
+                     IOUringEngine *engine, AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer,
                      atomic<uint64_t> *matched)
 {
        unsigned long long uncompressed_len = ZSTD_getFrameContentSize(compressed.data(), compressed.size());
@@ -182,14 +201,16 @@ void scan_file_block(const vector<Needle> &needles, string_view compressed,
        block[block.size() - 1] = '\0';
 
        auto test_candidate = [&](const char *filename, uint64_t local_seq, uint64_t next_seq) {
-               access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) {
-                       if (ok) {
-                               ++*matched;
-                               serializer->print(local_seq, next_seq - local_seq, filename);
-                       } else {
-                               serializer->print(local_seq, next_seq - local_seq, "");
-                       }
-                       free(filename);
+               access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, engine, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) {
+                       stat_if_needed(filename, ok, engine, [matched, serializer, local_seq, next_seq, filename](bool ok) {
+                               if (ok) {
+                                       ++*matched;
+                                       serializer->print(local_seq, next_seq - local_seq, filename);
+                               } else {
+                                       serializer->print(local_seq, next_seq - local_seq, "");
+                               }
+                               free(filename);
+                       });
                });
        };
 
@@ -240,8 +261,8 @@ size_t scan_docids(const vector<Needle> &needles, const vector<uint32_t> &docids
        atomic<uint64_t> matched{ 0 };
        for (size_t i = 0; i < docids.size(); ++i) {
                uint32_t docid = docids[i];
-               corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, &docids_in_order](string_view compressed) {
-                       scan_file_block(needles, compressed, &access_rx_cache, i, &docids_in_order, &matched);
+               corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, engine, &docids_in_order](string_view compressed) {
+                       scan_file_block(needles, compressed, engine, &access_rx_cache, i, &docids_in_order, &matched);
                });
        }
        engine->finish();
@@ -359,7 +380,8 @@ uint64_t scan_all_docids(const vector<Needle> &needles, int fd, const Corpus &co
                                for (uint32_t docid = io_docid; docid < last_docid; ++docid) {
                                        size_t relative_offset = offsets[docid] - offsets[io_docid];
                                        size_t len = offsets[docid + 1] - offsets[docid];
-                                       scan_file_block(*use_needles, { &compressed[relative_offset], len }, &access_rx_cache, docid, &receiver, &matched);
+                                       // IOUringEngine isn't thread-safe, so we do any needed stat()s synchronously (nullptr engine).
+                                       scan_file_block(*use_needles, { &compressed[relative_offset], len }, /*engine=*/nullptr, &access_rx_cache, docid, &receiver, &matched);
                                }
                        }
                });
@@ -467,7 +489,7 @@ uint64_t do_search_file(const vector<Needle> &needles, const std::string &filena
        }
 
        IOUringEngine engine(/*slop_bytes=*/16);  // 16 slop bytes as described in turbopfor.h.
-       Corpus corpus(fd, &engine);
+       Corpus corpus(fd, filename.c_str(), &engine);
        dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration<float>(steady_clock::now() - start).count());
 
        vector<TrigramDisjunction> trigram_groups;
@@ -788,6 +810,7 @@ void usage()
                "  -i, --ignore-case      search case-insensitively\n"
                "  -l, --limit LIMIT      stop after LIMIT matches\n"
                "  -0, --null             delimit matches by NUL instead of newline\n"
+               "  -N, --literal          do not quote filenames, even if printing to a tty\n"
                "  -r, --regexp           interpret patterns as basic regexps (slow)\n"
                "      --regex            interpret patterns as extended regexps (slow)\n"
                "  -w, --wholename        search the entire path name (default; see -b)\n"
@@ -814,10 +837,13 @@ int main(int argc, char **argv)
        static const struct option long_options[] = {
                { "help", no_argument, 0, 'h' },
                { "count", no_argument, 0, 'c' },
+               { "all", no_argument, 0, 'A' },
                { "basename", no_argument, 0, 'b' },
                { "database", required_argument, 0, 'd' },
+               { "existing", no_argument, 0, 'e' },
                { "ignore-case", no_argument, 0, 'i' },
                { "limit", required_argument, 0, 'l' },
+               { "literal", no_argument, 0, 'N' },
                { "null", no_argument, 0, '0' },
                { "version", no_argument, 0, 'V' },
                { "regexp", no_argument, 0, 'r' },
@@ -832,11 +858,14 @@ int main(int argc, char **argv)
        setlocale(LC_ALL, "");
        for (;;) {
                int option_index = 0;
-               int c = getopt_long(argc, argv, "bcd:hil:n:0rwVD", long_options, &option_index);
+               int c = getopt_long(argc, argv, "Abcd:ehil:n:N0rwVD", long_options, &option_index);
                if (c == -1) {
                        break;
                }
                switch (c) {
+               case 'A':
+                       // Ignored.
+                       break;
                case 'b':
                        match_basename = true;
                        break;
@@ -846,6 +875,9 @@ int main(int argc, char **argv)
                case 'd':
                        parse_dbpaths(optarg, &dbpaths);
                        break;
+               case 'e':
+                       check_existence = true;
+                       break;
                case 'h':
                        usage();
                        exit(0);
@@ -860,6 +892,9 @@ int main(int argc, char **argv)
                                exit(1);
                        }
                        break;
+               case 'N':
+                       literal_printing = true;
+                       break;
                case '0':
                        print_nul = true;
                        break;