]> git.sesse.net Git - plocate/blob - plocate.cpp
Add an undocumented flag --ignore-visibility.
[plocate] / plocate.cpp
1 #include "access_rx_cache.h"
2 #include "complete_pread.h"
3 #include "db.h"
4 #include "dprintf.h"
5 #include "io_uring_engine.h"
6 #include "needle.h"
7 #include "parse_trigrams.h"
8 #include "serializer.h"
9 #include "turbopfor.h"
10 #include "unique_sort.h"
11
12 #include <algorithm>
13 #include <assert.h>
14 #include <atomic>
15 #include <chrono>
16 #include <condition_variable>
17 #include <deque>
18 #include <fcntl.h>
19 #include <functional>
20 #include <getopt.h>
21 #include <inttypes.h>
22 #include <iterator>
23 #include <limits>
24 #include <locale.h>
25 #include <memory>
26 #include <mutex>
27 #include <regex.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <string>
33 #include <string_view>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <thread>
38 #include <tuple>
39 #include <unistd.h>
40 #include <unordered_map>
41 #include <unordered_set>
42 #include <utility>
43 #include <vector>
44 #include <zstd.h>
45
46 using namespace std;
47 using namespace std::chrono;
48
49 bool ignore_case = false;
50 bool only_count = false;
51 bool print_nul = false;
52 bool use_debug = false;
53 bool flush_cache = false;
54 bool patterns_are_regex = false;
55 bool use_extended_regex = false;
56 bool match_basename = false;
57 bool check_existence = false;
58 bool ignore_visibility = false;
59 int64_t limit_matches = numeric_limits<int64_t>::max();
60 int64_t limit_left = numeric_limits<int64_t>::max();
61 bool stdout_is_tty = false;
62 bool literal_printing = false;
63 static bool in_forked_child = false;
64
65 steady_clock::time_point start;
66 ZSTD_DDict *ddict = nullptr;
67
68 class Corpus {
69 public:
70         Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine);
71         ~Corpus();
72         void find_trigram(uint32_t trgm, function<void(const Trigram *trgmptr, size_t len)> cb);
73         void get_compressed_filename_block(uint32_t docid, function<void(string_view)> cb) const;
74         size_t get_num_filename_blocks() const;
75         off_t offset_for_block(uint32_t docid) const
76         {
77                 return hdr.filename_index_offset_bytes + docid * sizeof(uint64_t);
78         }
79         const Header &get_hdr() const { return hdr; }
80
81 public:
82         const int fd;
83         IOUringEngine *const engine;
84
85         Header hdr;
86 };
87
88 Corpus::Corpus(int fd, const char *filename_for_errors, IOUringEngine *engine)
89         : fd(fd), engine(engine)
90 {
91         if (flush_cache) {
92                 off_t len = lseek(fd, 0, SEEK_END);
93                 if (len == -1) {
94                         perror("lseek");
95                         exit(1);
96                 }
97                 posix_fadvise(fd, 0, len, POSIX_FADV_DONTNEED);
98         }
99
100         complete_pread(fd, &hdr, sizeof(hdr), /*offset=*/0);
101         if (memcmp(hdr.magic, "\0plocate", 8) != 0) {
102                 fprintf(stderr, "%s: database is corrupt or not a plocate database; please rebuild it.\n", filename_for_errors);
103                 exit(1);
104         }
105         if (hdr.version != 0 && hdr.version != 1) {
106                 fprintf(stderr, "%s: has version %u, expected 0 or 1; please rebuild it.\n", filename_for_errors, hdr.version);
107                 exit(1);
108         }
109         if (hdr.version == 0) {
110                 // These will be junk data.
111                 hdr.zstd_dictionary_offset_bytes = 0;
112                 hdr.zstd_dictionary_length_bytes = 0;
113         }
114         if (hdr.max_version < 2) {
115                 // This too. (We ignore the other max_version 2 fields.)
116                 hdr.check_visibility = true;
117         }
118         if (ignore_visibility) {
119                 hdr.check_visibility = false;
120         }
121 }
122
123 Corpus::~Corpus()
124 {
125         close(fd);
126 }
127
128 void Corpus::find_trigram(uint32_t trgm, function<void(const Trigram *trgmptr, size_t len)> cb)
129 {
130         uint32_t bucket = hash_trigram(trgm, hdr.hashtable_size);
131         engine->submit_read(fd, sizeof(Trigram) * (hdr.extra_ht_slots + 2), hdr.hash_table_offset_bytes + sizeof(Trigram) * bucket, [this, trgm, cb{ move(cb) }](string_view s) {
132                 const Trigram *trgmptr = reinterpret_cast<const Trigram *>(s.data());
133                 for (unsigned i = 0; i < hdr.extra_ht_slots + 1; ++i) {
134                         if (trgmptr[i].trgm == trgm) {
135                                 cb(trgmptr + i, trgmptr[i + 1].offset - trgmptr[i].offset);
136                                 return;
137                         }
138                 }
139
140                 // Not found.
141                 cb(nullptr, 0);
142         });
143 }
144
145 void Corpus::get_compressed_filename_block(uint32_t docid, function<void(string_view)> cb) const
146 {
147         // Read the file offset from this docid and the next one.
148         // This is always allowed, since we have a sentinel block at the end.
149         engine->submit_read(fd, sizeof(uint64_t) * 2, offset_for_block(docid), [this, cb{ move(cb) }](string_view s) {
150                 const uint64_t *ptr = reinterpret_cast<const uint64_t *>(s.data());
151                 off_t offset = ptr[0];
152                 size_t len = ptr[1] - ptr[0];
153                 engine->submit_read(fd, len, offset, cb);
154         });
155 }
156
157 size_t Corpus::get_num_filename_blocks() const
158 {
159         return hdr.num_docids;
160 }
161
162 template<class T>
163 void stat_if_needed(const char *filename, bool access_ok, IOUringEngine *engine, T cb)
164 {
165         if (!access_ok || !check_existence) {
166                 // Doesn't have access or doesn't care about existence, so no need to stat.
167                 cb(access_ok);
168         } else if (engine == nullptr || !engine->get_supports_stat()) {
169                 // Do a synchronous stat.
170                 struct stat buf;
171                 bool ok = lstat(filename, &buf) == 0;
172                 cb(ok);
173         } else {
174                 engine->submit_stat(filename, cb);
175         }
176 }
177
178 void scan_file_block(const vector<Needle> &needles, string_view compressed,
179                      IOUringEngine *engine, AccessRXCache *access_rx_cache, uint64_t seq, ResultReceiver *serializer,
180                      atomic<uint64_t> *matched)
181 {
182         unsigned long long uncompressed_len = ZSTD_getFrameContentSize(compressed.data(), compressed.size());
183         if (uncompressed_len == ZSTD_CONTENTSIZE_UNKNOWN || uncompressed_len == ZSTD_CONTENTSIZE_ERROR) {
184                 fprintf(stderr, "ZSTD_getFrameContentSize() failed\n");
185                 exit(1);
186         }
187
188         string block;
189         block.resize(uncompressed_len + 1);
190
191         static thread_local ZSTD_DCtx *ctx = ZSTD_createDCtx();  // Reused across calls.
192         size_t err;
193
194         if (ddict != nullptr) {
195                 err = ZSTD_decompress_usingDDict(ctx, &block[0], block.size(), compressed.data(),
196                                                  compressed.size(), ddict);
197         } else {
198                 err = ZSTD_decompressDCtx(ctx, &block[0], block.size(), compressed.data(),
199                                           compressed.size());
200         }
201         if (ZSTD_isError(err)) {
202                 fprintf(stderr, "ZSTD_decompress(): %s\n", ZSTD_getErrorName(err));
203                 exit(1);
204         }
205         block[block.size() - 1] = '\0';
206
207         auto test_candidate = [&](const char *filename, uint64_t local_seq, uint64_t next_seq) {
208                 access_rx_cache->check_access(filename, /*allow_async=*/true, [matched, engine, serializer, local_seq, next_seq, filename{ strdup(filename) }](bool ok) {
209                         stat_if_needed(filename, ok, engine, [matched, serializer, local_seq, next_seq, filename](bool ok) {
210                                 if (ok) {
211                                         ++*matched;
212                                         serializer->print(local_seq, next_seq - local_seq, filename);
213                                 } else {
214                                         serializer->print(local_seq, next_seq - local_seq, "");
215                                 }
216                                 free(filename);
217                         });
218                 });
219         };
220
221         // We need to know the next sequence number before inserting into Serializer,
222         // so always buffer one candidate.
223         const char *pending_candidate = nullptr;
224
225         uint64_t local_seq = seq << 32;
226         for (const char *filename = block.data();
227              filename != block.data() + block.size();
228              filename += strlen(filename) + 1) {
229                 const char *haystack = filename;
230                 if (match_basename) {
231                         haystack = strrchr(filename, '/');
232                         if (haystack == nullptr) {
233                                 haystack = filename;
234                         } else {
235                                 ++haystack;
236                         }
237                 }
238
239                 bool found = true;
240                 for (const Needle &needle : needles) {
241                         if (!matches(needle, haystack)) {
242                                 found = false;
243                                 break;
244                         }
245                 }
246                 if (found) {
247                         if (pending_candidate != nullptr) {
248                                 test_candidate(pending_candidate, local_seq, local_seq + 1);
249                                 ++local_seq;
250                         }
251                         pending_candidate = filename;
252                 }
253         }
254         if (pending_candidate == nullptr) {
255                 serializer->print(seq << 32, 1ULL << 32, "");
256         } else {
257                 test_candidate(pending_candidate, local_seq, (seq + 1) << 32);
258         }
259 }
260
261 size_t scan_docids(const vector<Needle> &needles, const vector<uint32_t> &docids, const Corpus &corpus, IOUringEngine *engine)
262 {
263         Serializer docids_in_order;
264         AccessRXCache access_rx_cache(engine, corpus.get_hdr().check_visibility);
265         atomic<uint64_t> matched{ 0 };
266         for (size_t i = 0; i < docids.size(); ++i) {
267                 uint32_t docid = docids[i];
268                 corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, engine, &docids_in_order](string_view compressed) {
269                         scan_file_block(needles, compressed, engine, &access_rx_cache, i, &docids_in_order, &matched);
270                 });
271         }
272         engine->finish();
273         return matched;
274 }
275
276 struct WorkerThread {
277         thread t;
278
279         // We use a result queue instead of synchronizing Serializer,
280         // since a lock on it becomes a huge choke point if there are
281         // lots of threads.
282         mutex result_mu;
283         struct Result {
284                 uint64_t seq;
285                 uint64_t skip;
286                 string msg;
287         };
288         vector<Result> results;
289 };
290
291 class WorkerThreadReceiver : public ResultReceiver {
292 public:
293         WorkerThreadReceiver(WorkerThread *wt)
294                 : wt(wt) {}
295
296         void print(uint64_t seq, uint64_t skip, const string msg) override
297         {
298                 lock_guard<mutex> lock(wt->result_mu);
299                 if (msg.empty() && !wt->results.empty() && wt->results.back().seq + wt->results.back().skip == seq) {
300                         wt->results.back().skip += skip;
301                 } else {
302                         wt->results.emplace_back(WorkerThread::Result{ seq, skip, move(msg) });
303                 }
304         }
305
306 private:
307         WorkerThread *wt;
308 };
309
310 void deliver_results(WorkerThread *wt, Serializer *serializer)
311 {
312         vector<WorkerThread::Result> results;
313         {
314                 lock_guard<mutex> lock(wt->result_mu);
315                 results = move(wt->results);
316         }
317         for (const WorkerThread::Result &result : results) {
318                 serializer->print(result.seq, result.skip, move(result.msg));
319         }
320 }
321
322 // We do this sequentially, as it's faster than scattering
323 // a lot of I/O through io_uring and hoping the kernel will
324 // coalesce it plus readahead for us. Since we assume that
325 // we will primarily be CPU-bound, we'll be firing up one
326 // worker thread for each spare core (the last one will
327 // only be doing I/O). access() is still synchronous.
328 uint64_t scan_all_docids(const vector<Needle> &needles, int fd, const Corpus &corpus)
329 {
330         {
331                 const Header &hdr = corpus.get_hdr();
332                 if (hdr.zstd_dictionary_length_bytes > 0) {
333                         string dictionary;
334                         dictionary.resize(hdr.zstd_dictionary_length_bytes);
335                         complete_pread(fd, &dictionary[0], hdr.zstd_dictionary_length_bytes, hdr.zstd_dictionary_offset_bytes);
336                         ddict = ZSTD_createDDict(dictionary.data(), dictionary.size());
337                 }
338         }
339
340         AccessRXCache access_rx_cache(nullptr, corpus.get_hdr().check_visibility);
341         Serializer serializer;
342         uint32_t num_blocks = corpus.get_num_filename_blocks();
343         unique_ptr<uint64_t[]> offsets(new uint64_t[num_blocks + 1]);
344         complete_pread(fd, offsets.get(), (num_blocks + 1) * sizeof(uint64_t), corpus.offset_for_block(0));
345         atomic<uint64_t> matched{ 0 };
346
347         mutex mu;
348         condition_variable queue_added, queue_removed;
349         deque<tuple<int, int, string>> work_queue;  // Under mu.
350         bool done = false;  // Under mu.
351
352         unsigned num_threads = max<int>(sysconf(_SC_NPROCESSORS_ONLN) - 1, 1);
353         dprintf("Using %u worker threads for linear scan.\n", num_threads);
354         unique_ptr<WorkerThread[]> threads(new WorkerThread[num_threads]);
355         for (unsigned i = 0; i < num_threads; ++i) {
356                 threads[i].t = thread([&threads, &mu, &queue_added, &queue_removed, &work_queue, &done, &offsets, &needles, &access_rx_cache, &matched, i] {
357                         // regcomp() takes a lock on the regex, so each thread will need its own.
358                         const vector<Needle> *use_needles = &needles;
359                         vector<Needle> recompiled_needles;
360                         if (i != 0 && patterns_are_regex) {
361                                 recompiled_needles = needles;
362                                 for (Needle &needle : recompiled_needles) {
363                                         needle.re = compile_regex(needle.str);
364                                 }
365                                 use_needles = &recompiled_needles;
366                         }
367
368                         WorkerThreadReceiver receiver(&threads[i]);
369                         for (;;) {
370                                 uint32_t io_docid, last_docid;
371                                 string compressed;
372
373                                 {
374                                         unique_lock lock(mu);
375                                         queue_added.wait(lock, [&work_queue, &done] { return !work_queue.empty() || done; });
376                                         if (done && work_queue.empty()) {
377                                                 return;
378                                         }
379                                         tie(io_docid, last_docid, compressed) = move(work_queue.front());
380                                         work_queue.pop_front();
381                                         queue_removed.notify_all();
382                                 }
383
384                                 for (uint32_t docid = io_docid; docid < last_docid; ++docid) {
385                                         size_t relative_offset = offsets[docid] - offsets[io_docid];
386                                         size_t len = offsets[docid + 1] - offsets[docid];
387                                         // IOUringEngine isn't thread-safe, so we do any needed stat()s synchronously (nullptr engine).
388                                         scan_file_block(*use_needles, { &compressed[relative_offset], len }, /*engine=*/nullptr, &access_rx_cache, docid, &receiver, &matched);
389                                 }
390                         }
391                 });
392         }
393
394         string compressed;
395         for (uint32_t io_docid = 0; io_docid < num_blocks; io_docid += 32) {
396                 uint32_t last_docid = std::min(io_docid + 32, num_blocks);
397                 size_t io_len = offsets[last_docid] - offsets[io_docid];
398                 if (compressed.size() < io_len) {
399                         compressed.resize(io_len);
400                 }
401                 complete_pread(fd, &compressed[0], io_len, offsets[io_docid]);
402
403                 {
404                         unique_lock lock(mu);
405                         queue_removed.wait(lock, [&work_queue] { return work_queue.size() < 256; });  // Allow ~2MB of data queued up.
406                         work_queue.emplace_back(io_docid, last_docid, move(compressed));
407                         queue_added.notify_one();  // Avoid the thundering herd.
408                 }
409
410                 // Pick up some results, so that we are sure that we won't just overload.
411                 // (Seemingly, going through all of these causes slowness with many threads,
412                 // but taking only one is OK.)
413                 unsigned i = io_docid / 32;
414                 deliver_results(&threads[i % num_threads], &serializer);
415         }
416         {
417                 lock_guard<mutex> lock(mu);
418                 done = true;
419                 queue_added.notify_all();
420         }
421         for (unsigned i = 0; i < num_threads; ++i) {
422                 threads[i].t.join();
423                 deliver_results(&threads[i], &serializer);
424         }
425         return matched;
426 }
427
428 // Takes the given posting list, unions it into the parts of the trigram disjunction
429 // already read; if the list is complete, intersects with “cur_candidates”.
430 //
431 // Returns true if the search should be aborted (we are done).
432 bool new_posting_list_read(TrigramDisjunction *td, vector<uint32_t> decoded, vector<uint32_t> *cur_candidates, vector<uint32_t> *tmp)
433 {
434         if (td->docids.empty()) {
435                 td->docids = move(decoded);
436         } else {
437                 tmp->clear();
438                 set_union(decoded.begin(), decoded.end(), td->docids.begin(), td->docids.end(), back_inserter(*tmp));
439                 swap(*tmp, td->docids);
440         }
441         if (--td->remaining_trigrams_to_read > 0) {
442                 // Need to wait for more.
443                 if (ignore_case) {
444                         dprintf("  ... %u reads left in OR group %u (%zu docids in list)\n",
445                                 td->remaining_trigrams_to_read, td->index, td->docids.size());
446                 }
447                 return false;
448         }
449         if (cur_candidates->empty()) {
450                 if (ignore_case) {
451                         dprintf("  ... all reads done for OR group %u (%zu docids)\n",
452                                 td->index, td->docids.size());
453                 }
454                 *cur_candidates = move(td->docids);
455         } else {
456                 tmp->clear();
457                 set_intersection(cur_candidates->begin(), cur_candidates->end(),
458                                  td->docids.begin(), td->docids.end(),
459                                  back_inserter(*tmp));
460                 swap(*cur_candidates, *tmp);
461                 if (ignore_case) {
462                         if (cur_candidates->empty()) {
463                                 dprintf("  ... all reads done for OR group %u (%zu docids), intersected (none left, search is done)\n",
464                                         td->index, td->docids.size());
465                                 return true;
466                         } else {
467                                 dprintf("  ... all reads done for OR group %u (%zu docids), intersected (%zu left)\n",
468                                         td->index, td->docids.size(), cur_candidates->size());
469                         }
470                 }
471         }
472         return false;
473 }
474
475 uint64_t do_search_file(const vector<Needle> &needles, const std::string &filename)
476 {
477         int fd = open(filename.c_str(), O_RDONLY);
478         if (fd == -1) {
479                 perror(filename.c_str());
480                 exit(1);
481         }
482
483         // Drop privileges.
484         if (setgid(getgid()) != 0) {
485                 perror("setgid");
486                 exit(EXIT_FAILURE);
487         }
488
489         start = steady_clock::now();
490         if (access("/", R_OK | X_OK)) {
491                 // We can't find anything, no need to bother...
492                 return 0;
493         }
494
495         IOUringEngine engine(/*slop_bytes=*/16);  // 16 slop bytes as described in turbopfor.h.
496         Corpus corpus(fd, filename.c_str(), &engine);
497         dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration<float>(steady_clock::now() - start).count());
498
499         vector<TrigramDisjunction> trigram_groups;
500         if (patterns_are_regex) {
501                 // We could parse the regex to find trigrams that have to be there
502                 // (there are actually known algorithms to deal with disjunctions
503                 // and such, too), but for now, we just go brute force.
504                 // Using locate with regexes is pretty niche.
505         } else {
506                 for (const Needle &needle : needles) {
507                         parse_trigrams(needle.str, ignore_case, &trigram_groups);
508                 }
509         }
510
511         unique_sort(
512                 &trigram_groups,
513                 [](const TrigramDisjunction &a, const TrigramDisjunction &b) { return a.trigram_alternatives < b.trigram_alternatives; },
514                 [](const TrigramDisjunction &a, const TrigramDisjunction &b) { return a.trigram_alternatives == b.trigram_alternatives; });
515
516         // Give them names for debugging.
517         unsigned td_index = 0;
518         for (TrigramDisjunction &td : trigram_groups) {
519                 td.index = td_index++;
520         }
521
522         // Collect which trigrams we need to look up in the hash table.
523         unordered_map<uint32_t, vector<TrigramDisjunction *>> trigrams_to_lookup;
524         for (TrigramDisjunction &td : trigram_groups) {
525                 for (uint32_t trgm : td.trigram_alternatives) {
526                         trigrams_to_lookup[trgm].push_back(&td);
527                 }
528         }
529         if (trigrams_to_lookup.empty()) {
530                 // Too short for trigram matching. Apply brute force.
531                 // (We could have searched through all trigrams that matched
532                 // the pattern and done a union of them, but that's a lot of
533                 // work for fairly unclear gain.)
534                 uint64_t matched = scan_all_docids(needles, fd, corpus);
535                 dprintf("Done in %.1f ms, found %" PRId64 " matches.\n",
536                         1e3 * duration<float>(steady_clock::now() - start).count(), matched);
537                 return matched;
538         }
539
540         // Sneak in fetching the dictionary, if present. It's not necessarily clear
541         // exactly where it would be cheapest to get it, but it needs to be present
542         // before we can decode any of the posting lists. Most likely, it's
543         // in the same filesystem block as the header anyway, so it should be
544         // present in the cache.
545         {
546                 const Header &hdr = corpus.get_hdr();
547                 if (hdr.zstd_dictionary_length_bytes > 0) {
548                         engine.submit_read(fd, hdr.zstd_dictionary_length_bytes, hdr.zstd_dictionary_offset_bytes, [](string_view s) {
549                                 ddict = ZSTD_createDDict(s.data(), s.size());
550                                 dprintf("Dictionary initialized after %.1f ms.\n", 1e3 * duration<float>(steady_clock::now() - start).count());
551                         });
552                 }
553         }
554
555         // Look them all up on disk.
556         bool should_early_exit = false;
557         for (auto &[trgm, trigram_groups] : trigrams_to_lookup) {
558                 corpus.find_trigram(trgm, [trgm{ trgm }, trigram_groups{ &trigram_groups }, &should_early_exit](const Trigram *trgmptr, size_t len) {
559                         if (trgmptr == nullptr) {
560                                 dprintf("trigram %s isn't found\n", print_trigram(trgm).c_str());
561                                 for (TrigramDisjunction *td : *trigram_groups) {
562                                         --td->remaining_trigrams_to_read;
563
564                                         // If we now know this trigram group doesn't match anything at all,
565                                         // we can do early exit; however, if we're in a forked child,
566                                         // that would confuse the parent process (since we don't write
567                                         // our count to the pipe), so we wait until we're back in to the
568                                         // regular (non-async) context. This is a fairly rare case anyway,
569                                         // and the gains from dropping the remaining trigram reads are limited.
570                                         if (td->remaining_trigrams_to_read == 0 && td->read_trigrams.empty()) {
571                                                 if (in_forked_child) {
572                                                         should_early_exit = true;
573                                                 } else {
574                                                         dprintf("zero matches in %s, so we are done\n", print_td(*td).c_str());
575                                                         if (only_count) {
576                                                                 printf("0\n");
577                                                         }
578                                                         exit(1);
579                                                 }
580                                         }
581                                 }
582                                 return;
583                         }
584                         for (TrigramDisjunction *td : *trigram_groups) {
585                                 --td->remaining_trigrams_to_read;
586                                 td->max_num_docids += trgmptr->num_docids;
587                                 td->read_trigrams.emplace_back(*trgmptr, len);
588                         }
589                 });
590         }
591         engine.finish();
592         dprintf("Hashtable lookups done after %.1f ms.\n", 1e3 * duration<float>(steady_clock::now() - start).count());
593
594         if (should_early_exit) {
595                 return 0;
596         }
597
598         for (TrigramDisjunction &td : trigram_groups) {
599                 // Reset for reads.
600                 td.remaining_trigrams_to_read = td.read_trigrams.size();
601
602                 if (ignore_case) {  // If case-sensitive, they'll all be pretty obvious single-entry groups.
603                         dprintf("OR group %u (max_num_docids=%u): %s\n", td.index, td.max_num_docids, print_td(td).c_str());
604                 }
605         }
606
607         // TODO: For case-insensitive (ie. more than one alternative in each),
608         // prioritize the ones with fewer seeks?
609         sort(trigram_groups.begin(), trigram_groups.end(),
610              [&](const TrigramDisjunction &a, const TrigramDisjunction &b) {
611                      return a.max_num_docids < b.max_num_docids;
612              });
613
614         unordered_map<uint32_t, vector<TrigramDisjunction *>> uses_trigram;
615         for (TrigramDisjunction &td : trigram_groups) {
616                 for (uint32_t trgm : td.trigram_alternatives) {
617                         uses_trigram[trgm].push_back(&td);
618                 }
619         }
620
621         unordered_set<uint32_t> trigrams_submitted_read;
622         vector<uint32_t> cur_candidates, tmp, decoded;
623         bool done = false;
624         for (TrigramDisjunction &td : trigram_groups) {
625                 if (!cur_candidates.empty() && td.max_num_docids > cur_candidates.size() * 100) {
626                         dprintf("%s has up to %u entries, ignoring the rest (will "
627                                 "weed out false positives later)\n",
628                                 print_td(td).c_str(), td.max_num_docids);
629                         break;
630                 }
631
632                 for (auto &[trgmptr, len] : td.read_trigrams) {
633                         if (trigrams_submitted_read.count(trgmptr.trgm) != 0) {
634                                 continue;
635                         }
636                         trigrams_submitted_read.insert(trgmptr.trgm);
637                         // Only stay a certain amount ahead, so that we don't spend I/O
638                         // on reading the latter, large posting lists. We are unlikely
639                         // to need them anyway, even if they should come in first.
640                         if (engine.get_waiting_reads() >= 5) {
641                                 engine.finish();
642                                 if (done)
643                                         break;
644                         }
645                         engine.submit_read(fd, len, trgmptr.offset, [trgmptr{ trgmptr }, len{ len }, &done, &cur_candidates, &tmp, &decoded, &uses_trigram](string_view s) {
646                                 if (done)
647                                         return;
648
649                                 uint32_t trgm = trgmptr.trgm;
650                                 const unsigned char *pldata = reinterpret_cast<const unsigned char *>(s.data());
651                                 size_t num = trgmptr.num_docids;
652                                 decoded.resize(num);
653                                 decode_pfor_delta1_128(pldata, num, /*interleaved=*/true, &decoded[0]);
654
655                                 assert(uses_trigram.count(trgm) != 0);
656                                 bool was_empty = cur_candidates.empty();
657                                 if (ignore_case) {
658                                         dprintf("trigram %s (%zu bytes) decoded to %zu entries\n", print_trigram(trgm).c_str(), len, num);
659                                 }
660
661                                 for (TrigramDisjunction *td : uses_trigram[trgm]) {
662                                         done |= new_posting_list_read(td, decoded, &cur_candidates, &tmp);
663                                         if (done)
664                                                 break;
665                                 }
666                                 if (!ignore_case) {
667                                         if (was_empty) {
668                                                 dprintf("trigram %s (%zu bytes) decoded to %zu entries\n", print_trigram(trgm).c_str(), len, num);
669                                         } else if (cur_candidates.empty()) {
670                                                 dprintf("trigram %s (%zu bytes) decoded to %zu entries (none left, search is done)\n", print_trigram(trgm).c_str(), len, num);
671                                         } else {
672                                                 dprintf("trigram %s (%zu bytes) decoded to %zu entries (%zu left)\n", print_trigram(trgm).c_str(), len, num, cur_candidates.size());
673                                         }
674                                 }
675                         });
676                 }
677         }
678         engine.finish();
679         if (done) {
680                 return 0;
681         }
682         dprintf("Intersection done after %.1f ms. Doing final verification and printing:\n",
683                 1e3 * duration<float>(steady_clock::now() - start).count());
684
685         uint64_t matched = scan_docids(needles, cur_candidates, corpus, &engine);
686         dprintf("Done in %.1f ms, found %" PRId64 " matches.\n",
687                 1e3 * duration<float>(steady_clock::now() - start).count(), matched);
688         return matched;
689 }
690
691 // Run do_search_file() in a child process.
692 //
693 // The reason for this is that we're not robust against malicious input, so we need
694 // to drop privileges after opening the file. (Otherwise, we could fall prey to an attack
695 // where a user does locate -d badfile.db:/var/lib/plocate/plocate.db, badfile.db contains
696 // a buffer overflow that takes over the process, and then uses the elevated privileges
697 // to print out otherwise inaccessible paths.) We solve this by forking and treating the
698 // child process as untrusted after it has dropped its privileges (which it does before
699 // reading any data from the file); it returns a single 64-bit number over a pipe,
700 // and that's it. The parent keeps its privileges, and can then fork out new children
701 // without fear of being taken over. (The child keeps stdout for outputting results.)
702 //
703 // The count is returned over the pipe, because it's needed both for --limit and --count.
704 uint64_t do_search_file_in_child(const vector<Needle> &needles, const std::string &filename)
705 {
706         int pipefd[2];
707         if (pipe(pipefd) == -1) {
708                 perror("pipe");
709                 exit(EXIT_FAILURE);
710         }
711
712         pid_t child_pid = fork();
713         switch (child_pid) {
714         case 0: {
715                 // Child.
716                 close(pipefd[0]);
717                 in_forked_child = true;
718                 uint64_t matched = do_search_file(needles, filename);
719                 int ret;
720                 do {
721                         ret = write(pipefd[1], &matched, sizeof(matched));
722                 } while (ret == -1 && errno == EINTR);
723                 if (ret != sizeof(matched)) {
724                         perror("write");
725                         _exit(EXIT_FAILURE);
726                 }
727                 fflush(stdout);
728                 _exit(EXIT_SUCCESS);
729         }
730         case -1:
731                 // Error.
732                 perror("fork");
733                 exit(EXIT_FAILURE);
734         default:
735                 // Parent.
736                 close(pipefd[1]);
737                 break;
738         }
739
740         // Wait for the child to finish.
741         int wstatus;
742         pid_t err;
743         do {
744                 err = waitpid(child_pid, &wstatus, 0);
745         } while (err == -1 && errno == EINTR);
746         if (err == -1) {
747                 perror("waitpid");
748                 exit(EXIT_FAILURE);
749         }
750         if (WIFEXITED(wstatus)) {
751                 if (WEXITSTATUS(wstatus) != 0) {
752                         // The child has probably already printed out its error, so just propagate the exit status.
753                         exit(WEXITSTATUS(wstatus));
754                 }
755                 // Success!
756         } else if (!WIFEXITED(wstatus)) {
757                 fprintf(stderr, "FATAL: Child died unexpectedly while processing %s\n", filename.c_str());
758                 exit(1);
759         }
760
761         // Now get the number of matches from the child.
762         uint64_t matched;
763         int ret;
764         do {
765                 ret = read(pipefd[0], &matched, sizeof(matched));
766         } while (ret == -1 && errno == EINTR);
767         if (ret == -1) {
768                 perror("read");
769                 exit(EXIT_FAILURE);
770         } else if (ret != sizeof(matched)) {
771                 fprintf(stderr, "FATAL: Short read through pipe (got %d bytes)\n", ret);
772                 exit(EXIT_FAILURE);
773         }
774         close(pipefd[0]);
775         return matched;
776 }
777
778 // Parses a colon-separated list of strings and appends them onto the given vector.
779 // Backslash escapes whatever comes after it.
780 void parse_dbpaths(const char *ptr, vector<string> *output)
781 {
782         string str;
783         while (*ptr != '\0') {
784                 if (*ptr == '\\') {
785                         if (ptr[1] == '\0') {
786                                 fprintf(stderr, "ERROR: Escape character at the end of string\n");
787                                 exit(EXIT_FAILURE);
788                         }
789                         // Escape.
790                         str.push_back(ptr[1]);
791                         ptr += 2;
792                         continue;
793                 }
794                 if (*ptr == ':') {
795                         // Separator.
796                         output->push_back(move(str));
797                         ++ptr;
798                         continue;
799                 }
800                 str.push_back(*ptr++);
801         }
802         output->push_back(move(str));
803 }
804
805 void usage()
806 {
807         printf(
808                 "Usage: plocate [OPTION]... PATTERN...\n"
809                 "\n"
810                 "  -b, --basename         search only the file name portion of path names\n"
811                 "  -c, --count            print number of matches instead of the matches\n"
812                 "  -d, --database DBPATH  search for files in DBPATH\n"
813                 "                         (default is " DBFILE ")\n"
814                 "  -i, --ignore-case      search case-insensitively\n"
815                 "  -l, --limit LIMIT      stop after LIMIT matches\n"
816                 "  -0, --null             delimit matches by NUL instead of newline\n"
817                 "  -N, --literal          do not quote filenames, even if printing to a tty\n"
818                 "  -r, --regexp           interpret patterns as basic regexps (slow)\n"
819                 "      --regex            interpret patterns as extended regexps (slow)\n"
820                 "  -w, --wholename        search the entire path name (default; see -b)\n"
821                 "      --help             print this help\n"
822                 "      --version          print version information\n");
823 }
824
825 void version()
826 {
827         printf("%s %s\n", PACKAGE_NAME, PACKAGE_VERSION);
828         printf("Copyright 2020 Steinar H. Gunderson\n");
829         printf("License GPLv2+: GNU GPL version 2 or later <https://gnu.org/licenses/gpl.html>.\n");
830         printf("This is free software: you are free to change and redistribute it.\n");
831         printf("There is NO WARRANTY, to the extent permitted by law.\n");
832         exit(0);
833 }
834
835 int main(int argc, char **argv)
836 {
837         vector<string> dbpaths;
838
839         constexpr int EXTENDED_REGEX = 1000;
840         constexpr int FLUSH_CACHE = 1001;
841         constexpr int IGNORE_VISIBILITY = 1002;
842         static const struct option long_options[] = {
843                 { "help", no_argument, 0, 'h' },
844                 { "count", no_argument, 0, 'c' },
845                 { "all", no_argument, 0, 'A' },
846                 { "basename", no_argument, 0, 'b' },
847                 { "database", required_argument, 0, 'd' },
848                 { "existing", no_argument, 0, 'e' },
849                 { "ignore-case", no_argument, 0, 'i' },
850                 { "limit", required_argument, 0, 'l' },
851                 { "literal", no_argument, 0, 'N' },
852                 { "null", no_argument, 0, '0' },
853                 { "version", no_argument, 0, 'V' },
854                 { "regexp", no_argument, 0, 'r' },
855                 { "regex", no_argument, 0, EXTENDED_REGEX },
856                 { "wholename", no_argument, 0, 'w' },
857                 { "debug", no_argument, 0, 'D' },  // Not documented.
858                 // Enable to test cold-cache behavior (except for access()). Not documented.
859                 { "flush-cache", no_argument, 0, FLUSH_CACHE },
860                 // Mostly useful to dump out the entire database, even if the given directories
861                 // are gone. Disables sgid due to security. Not documented.
862                 { "ignore-visibility", no_argument, 0, IGNORE_VISIBILITY },
863                 { 0, 0, 0, 0 }
864         };
865
866         setlocale(LC_ALL, "");
867         for (;;) {
868                 int option_index = 0;
869                 int c = getopt_long(argc, argv, "Abcd:ehil:n:N0rwVD", long_options, &option_index);
870                 if (c == -1) {
871                         break;
872                 }
873                 switch (c) {
874                 case 'A':
875                         // Ignored.
876                         break;
877                 case 'b':
878                         match_basename = true;
879                         break;
880                 case 'c':
881                         only_count = true;
882                         break;
883                 case 'd':
884                         parse_dbpaths(optarg, &dbpaths);
885                         break;
886                 case 'e':
887                         check_existence = true;
888                         break;
889                 case 'h':
890                         usage();
891                         exit(0);
892                 case 'i':
893                         ignore_case = true;
894                         break;
895                 case 'l':
896                 case 'n':
897                         limit_matches = limit_left = atoll(optarg);
898                         if (limit_matches <= 0) {
899                                 fprintf(stderr, "Error: limit must be a strictly positive number.\n");
900                                 exit(1);
901                         }
902                         break;
903                 case 'N':
904                         literal_printing = true;
905                         break;
906                 case '0':
907                         print_nul = true;
908                         break;
909                 case 'r':
910                         patterns_are_regex = true;
911                         break;
912                 case EXTENDED_REGEX:
913                         patterns_are_regex = true;
914                         use_extended_regex = true;
915                         break;
916                 case 'w':
917                         match_basename = false;  // No-op unless -b is given first.
918                         break;
919                 case 'D':
920                         use_debug = true;
921                         break;
922                 case FLUSH_CACHE:
923                         flush_cache = true;
924                         break;
925                 case 'V':
926                         version();
927                         break;
928                 case IGNORE_VISIBILITY:
929                         ignore_visibility = true;
930                         break;
931                 default:
932                         exit(1);
933                 }
934         }
935
936         if (use_debug || flush_cache || ignore_visibility) {
937                 // Debug information would leak information about which files exist,
938                 // so drop setgid before we open the file; one would either need to run
939                 // as root, or use a locally-built file. Doing the same thing for
940                 // flush_cache is mostly paranoia, in an attempt to prevent random users
941                 // from making plocate slow for everyone else.
942                 // --ignore-visibility is obvious; if we allowed to keep sgid with
943                 // that flag on, it would subvert the entire security model.
944                 if (setgid(getgid()) != 0) {
945                         perror("setgid");
946                         exit(EXIT_FAILURE);
947                 }
948         }
949
950         if (!print_nul) {
951                 stdout_is_tty = isatty(1);
952         }
953
954         vector<Needle> needles;
955         for (int i = optind; i < argc; ++i) {
956                 Needle needle;
957                 needle.str = argv[i];
958
959                 // See if there are any wildcard characters, which indicates we should treat it
960                 // as an (anchored) glob.
961                 bool any_wildcard = false;
962                 for (size_t i = 0; i < needle.str.size(); i += read_unigram(needle.str, i).second) {
963                         if (read_unigram(needle.str, i).first == WILDCARD_UNIGRAM) {
964                                 any_wildcard = true;
965                                 break;
966                         }
967                 }
968
969                 if (patterns_are_regex) {
970                         needle.type = Needle::REGEX;
971                         needle.re = compile_regex(needle.str);
972                 } else if (any_wildcard) {
973                         needle.type = Needle::GLOB;
974                 } else if (ignore_case) {
975                         // strcasestr() doesn't handle locales correctly (even though LSB
976                         // claims it should), but somehow, fnmatch() does, and it's about
977                         // the same speed as using a regex.
978                         needle.type = Needle::GLOB;
979                         needle.str = "*" + needle.str + "*";
980                 } else {
981                         needle.type = Needle::STRSTR;
982                         needle.str = unescape_glob_to_plain_string(needle.str);
983                 }
984                 needles.push_back(move(needle));
985         }
986         if (needles.empty()) {
987                 fprintf(stderr, "plocate: no pattern to search for specified\n");
988                 exit(1);
989         }
990
991         if (dbpaths.empty()) {
992                 // No -d given, so use our default. Note that this happens
993                 // even if LOCATE_PATH exists, to match mlocate behavior.
994                 dbpaths.push_back(DBFILE);
995         }
996
997         const char *locate_path = getenv("LOCATE_PATH");
998         if (locate_path != nullptr) {
999                 parse_dbpaths(locate_path, &dbpaths);
1000         }
1001
1002         uint64_t matched = 0;
1003         for (size_t i = 0; i < dbpaths.size(); ++i) {
1004                 uint64_t this_matched;
1005                 if (i != dbpaths.size() - 1) {
1006                         this_matched = do_search_file_in_child(needles, dbpaths[i]);
1007                 } else {
1008                         this_matched = do_search_file(needles, dbpaths[i]);
1009                 }
1010                 matched += this_matched;
1011                 limit_left -= this_matched;
1012         }
1013         if (only_count) {
1014                 printf("%" PRId64 "\n", matched);
1015         }
1016
1017         return matched == 0;
1018 }