From 1dd9495198bb877e0d5d332def4d3b218fc353ba Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 10 Oct 2020 10:33:36 +0200 Subject: [PATCH] Remove the double filtering of too large posting lists; we would not even start I/O for it anyway, so there is less to save than was assumed. --- plocate.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/plocate.cpp b/plocate.cpp index b095bb6..a908d30 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -370,13 +370,12 @@ void do_search_file(const vector &needles, const char *filename) dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration(steady_clock::now() - start).count()); vector> trigrams; - uint64_t shortest_so_far = numeric_limits::max(); for (const string &needle : needles) { if (needle.size() < 3) continue; for (size_t i = 0; i < needle.size() - 2; ++i) { uint32_t trgm = read_trigram(needle, i); - corpus.find_trigram(trgm, [trgm, &trigrams, &shortest_so_far](const Trigram *trgmptr, size_t len) { + corpus.find_trigram(trgm, [trgm, &trigrams](const Trigram *trgmptr, size_t len) { if (trgmptr == nullptr) { dprintf("trigram %s isn't found, we abort the search\n", print_trigram(trgm).c_str()); if (only_count) { @@ -384,13 +383,7 @@ void do_search_file(const vector &needles, const char *filename) } exit(0); } - if (trgmptr->num_docids > shortest_so_far * 100) { - dprintf("not loading trigram %s with %u docids, it would be ignored later anyway\n", - print_trigram(trgm).c_str(), trgmptr->num_docids); - } else { - trigrams.emplace_back(*trgmptr, len); - shortest_so_far = std::min(shortest_so_far, trgmptr->num_docids); - } + trigrams.emplace_back(*trgmptr, len); }); } } -- 2.39.2