From: Steinar H. Gunderson Date: Sat, 10 Oct 2020 08:33:36 +0000 (+0200) Subject: Remove the double filtering of too large posting lists; we would not even start I... X-Git-Tag: 1.0.0~20 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=1dd9495198bb877e0d5d332def4d3b218fc353ba;p=plocate Remove the double filtering of too large posting lists; we would not even start I/O for it anyway, so there is less to save than was assumed. --- diff --git a/plocate.cpp b/plocate.cpp index b095bb6..a908d30 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -370,13 +370,12 @@ void do_search_file(const vector &needles, const char *filename) dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration(steady_clock::now() - start).count()); vector> trigrams; - uint64_t shortest_so_far = numeric_limits::max(); for (const string &needle : needles) { if (needle.size() < 3) continue; for (size_t i = 0; i < needle.size() - 2; ++i) { uint32_t trgm = read_trigram(needle, i); - corpus.find_trigram(trgm, [trgm, &trigrams, &shortest_so_far](const Trigram *trgmptr, size_t len) { + corpus.find_trigram(trgm, [trgm, &trigrams](const Trigram *trgmptr, size_t len) { if (trgmptr == nullptr) { dprintf("trigram %s isn't found, we abort the search\n", print_trigram(trgm).c_str()); if (only_count) { @@ -384,13 +383,7 @@ void do_search_file(const vector &needles, const char *filename) } exit(0); } - if (trgmptr->num_docids > shortest_so_far * 100) { - dprintf("not loading trigram %s with %u docids, it would be ignored later anyway\n", - print_trigram(trgm).c_str(), trgmptr->num_docids); - } else { - trigrams.emplace_back(*trgmptr, len); - shortest_so_far = std::min(shortest_so_far, trgmptr->num_docids); - } + trigrams.emplace_back(*trgmptr, len); }); } }