From e3a362166c8012ebc689b437b5ead928c11fbe3e Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 28 Sep 2020 23:52:46 +0200 Subject: [PATCH] Refactor scanning through a filename block into its own function. --- plocate.cpp | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/plocate.cpp b/plocate.cpp index 5f5ca4e..390bd21 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -76,6 +76,32 @@ struct Trigram { uint64_t offset; }; +size_t scan_docid(const string &needle, uint32_t docid, const char *data, const uint64_t *filename_offsets, unordered_map *access_rx_cache) +{ + const char *compressed = (const char *)(data + filename_offsets[docid]); + size_t compressed_size = filename_offsets[docid + 1] - filename_offsets[docid]; // Allowed we have a sentinel block at the end. + size_t matched = 0; + + string block; + block.resize(ZSTD_getFrameContentSize(compressed, compressed_size) + 1); + + ZSTD_decompress(&block[0], block.size(), compressed, compressed_size); + block[block.size() - 1] = '\0'; + + for (const char *filename = block.data(); + filename != block.data() + block.size(); + filename += strlen(filename) + 1) { + if (strstr(filename, needle.c_str()) == nullptr) { + continue; + } + if (has_access(filename, access_rx_cache)) { + ++matched; + printf("%s\n", filename); + } + } + return matched; +} + void do_search_file(const string &needle, const char *filename) { int fd = open(filename, O_RDONLY); @@ -177,26 +203,7 @@ void do_search_file(const string &needle, const char *filename) const uint64_t *filename_offsets = (const uint64_t *)(data + filename_index_offset); int matched = 0; for (uint32_t docid : in1) { - const char *compressed = (const char *)(data + filename_offsets[docid]); - size_t compressed_size = filename_offsets[docid + 1] - filename_offsets[docid]; // Allowed we have a sentinel block at the end. - - string block; - block.resize(ZSTD_getFrameContentSize(compressed, compressed_size) + 1); - - ZSTD_decompress(&block[0], block.size(), compressed, compressed_size); - block[block.size() - 1] = '\0'; - - for (const char *filename = block.data(); - filename != block.data() + block.size(); - filename += strlen(filename) + 1) { - if (strstr(filename, needle.c_str()) == nullptr) { - continue; - } - if (has_access(filename, &access_rx_cache)) { - ++matched; - printf("%s\n", filename); - } - } + matched += scan_docid(needle, docid, data, filename_offsets, &access_rx_cache); } end = steady_clock::now(); dprintf("Done in %.1f ms, found %d matches.\n", -- 2.39.2