From 6f7838012a445183eeaba4dde44d4b2b1617aa6b Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 1 Oct 2020 10:16:31 +0200 Subject: [PATCH] Loosen up serialization to be about printing only. This allows us to decompress, match and check access() for docids that arrive out-of-order, instead of serializing their processing. Especially the access() checking is good to have overlapping other I/O if possible, even though it's synchronous. If we ever get async access(), we'll need to rework Serializer again, probably. --- plocate.cpp | 56 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/plocate.cpp b/plocate.cpp index a50a33a..8ba5aa8 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -29,13 +30,15 @@ bool print_nul = false; class Serializer { public: - void do_or_wait(int seq, function cb); + bool ready_to_print(int seq) { return next_seq == seq; } + void print_delayed(int seq, const vector msg); + void release_current(); private: int next_seq = 0; struct Element { int seq; - function cb; + vector msg; bool operator<(const Element &other) const { @@ -45,18 +48,24 @@ private: priority_queue pending; }; -void Serializer::do_or_wait(int seq, function cb) +void Serializer::print_delayed(int seq, const vector msg) { - if (seq != next_seq) { - pending.emplace(Element{ seq, move(cb) }); - return; - } + pending.push(Element{seq, move(msg)}); +} - cb(); +void Serializer::release_current() +{ ++next_seq; + // See if any delayed prints can now be dealt with. while (!pending.empty() && pending.top().seq == next_seq) { - pending.top().cb(); + for (const string &msg : pending.top().msg) { + if (print_nul) { + printf("%s%c", msg.c_str(), 0); + } else { + printf("%s\n", msg.c_str()); + } + } pending.pop(); ++next_seq; } @@ -188,7 +197,8 @@ size_t Corpus::get_num_filename_blocks() const } size_t scan_file_block(const vector &needles, string_view compressed, - unordered_map *access_rx_cache) + unordered_map *access_rx_cache, int seq, + Serializer *serializer) { size_t matched = 0; @@ -209,6 +219,9 @@ size_t scan_file_block(const vector &needles, string_view compressed, } block[block.size() - 1] = '\0'; + bool immediate_print = (serializer == nullptr || serializer->ready_to_print(seq)); + vector delayed; + for (const char *filename = block.data(); filename != block.data() + block.size(); filename += strlen(filename) + 1) { @@ -221,13 +234,24 @@ size_t scan_file_block(const vector &needles, string_view compressed, } if (found && has_access(filename, access_rx_cache)) { ++matched; - if (print_nul) { - printf("%s%c", filename, 0); + if (immediate_print) { + if (print_nul) { + printf("%s%c", filename, 0); + } else { + printf("%s\n", filename); + } } else { - printf("%s\n", filename); + delayed.push_back(filename); } } } + if (serializer != nullptr) { + if (immediate_print) { + serializer->release_current(); + } else { + serializer->print_delayed(seq, move(delayed)); + } + } return matched; } @@ -239,9 +263,7 @@ size_t scan_docids(const vector &needles, const vector &docids for (size_t i = 0; i < docids.size(); ++i) { uint32_t docid = docids[i]; corpus.get_compressed_filename_block(docid, [i, &matched, &needles, &access_rx_cache, &docids_in_order](string compressed) { - docids_in_order.do_or_wait(i, [&matched, &needles, compressed{ move(compressed) }, &access_rx_cache] { - matched += scan_file_block(needles, compressed, &access_rx_cache); - }); + matched += scan_file_block(needles, compressed, &access_rx_cache, i, &docids_in_order); }); } engine->finish(); @@ -269,7 +291,7 @@ void scan_all_docids(const vector &needles, int fd, const Corpus &corpus for (uint32_t docid = io_docid; docid < last_docid; ++docid) { size_t relative_offset = offsets[docid] - offsets[io_docid]; size_t len = offsets[docid + 1] - offsets[docid]; - scan_file_block(needles, { &compressed[relative_offset], len }, &access_rx_cache); + scan_file_block(needles, { &compressed[relative_offset], len }, &access_rx_cache, 0, nullptr); } } } -- 2.39.2