From d5ba26d705460a7e37213eeb4954b2efed8bebf0 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 16 Oct 2020 00:36:20 +0200 Subject: [PATCH] Move AccessRXCache into its own file. --- access_rx_cache.cpp | 74 ++++++++++++++++++++++++++++++++++++++++ access_rx_cache.h | 30 ++++++++++++++++ meson.build | 2 +- plocate.cpp | 83 +-------------------------------------------- 4 files changed, 106 insertions(+), 83 deletions(-) create mode 100644 access_rx_cache.cpp create mode 100644 access_rx_cache.h diff --git a/access_rx_cache.cpp b/access_rx_cache.cpp new file mode 100644 index 0000000..c6f5634 --- /dev/null +++ b/access_rx_cache.cpp @@ -0,0 +1,74 @@ +#include "access_rx_cache.h" + +#include "io_uring_engine.h" + +#include +#include +#include +#include + +using namespace std; + +void AccessRXCache::check_access(const char *filename, bool allow_async, function cb) +{ + lock_guard lock(mu); + if (engine == nullptr || !engine->get_supports_stat()) { + allow_async = false; + } + + for (const char *end = strchr(filename + 1, '/'); end != nullptr; end = strchr(end + 1, '/')) { + string parent_path(filename, end - filename); // string_view from C++20. + auto cache_it = cache.find(parent_path); + if (cache_it != cache.end()) { + // Found in the cache. + if (!cache_it->second) { + cb(false); + return; + } + continue; + } + + if (!allow_async) { + bool ok = access(parent_path.c_str(), R_OK | X_OK) == 0; + cache.emplace(parent_path, ok); + if (!ok) { + cb(false); + return; + } + continue; + } + + // We want to call access(), but it could block on I/O. io_uring doesn't support + // access(), but we can do a dummy asynchonous statx() to populate the kernel's cache, + // which nearly always makes the next access() instantaneous. + + // See if there's already a pending stat that matches this, + // or is a subdirectory. + auto it = pending_stats.lower_bound(parent_path); + if (it != pending_stats.end() && it->first.size() >= parent_path.size() && + it->first.compare(0, parent_path.size(), parent_path) == 0) { + it->second.emplace_back(PendingStat{ filename, move(cb) }); + } else { + it = pending_stats.emplace(filename, vector{}).first; + engine->submit_stat(filename, [this, it, filename{ strdup(filename) }, cb{ move(cb) }] { + // The stat returned, so now do the actual access() calls. + // All of them should be in cache, so don't fire off new statx() + // calls during that check. + check_access(filename, /*allow_async=*/false, move(cb)); + free(filename); + + // Call all others that waited for the same stat() to finish. + // They may fire off new stat() calls if needed. + vector pending = move(it->second); + pending_stats.erase(it); + for (PendingStat &ps : pending) { + check_access(ps.filename.c_str(), /*allow_async=*/true, move(ps.cb)); + } + }); + } + return; // The rest will happen in async context. + } + + // Passed all checks. + cb(true); +} diff --git a/access_rx_cache.h b/access_rx_cache.h new file mode 100644 index 0000000..8757b94 --- /dev/null +++ b/access_rx_cache.h @@ -0,0 +1,30 @@ +#ifndef _ACCESS_RX_CACHE_H +#define _ACCESS_RX_CACHE_H 1 + +#include +#include +#include +#include +#include +#include + +class IOUringEngine; + +class AccessRXCache { +public: + AccessRXCache(IOUringEngine *engine) + : engine(engine) {} + void check_access(const char *filename, bool allow_async, std::function cb); + +private: + std::unordered_map cache; + struct PendingStat { + std::string filename; + std::function cb; + }; + std::map> pending_stats; + IOUringEngine *engine; + std::mutex mu; +}; + +#endif // !defined(_ACCESS_RX_CACHE_H) diff --git a/meson.build b/meson.build index 7c6755b..d6d0e99 100644 --- a/meson.build +++ b/meson.build @@ -12,7 +12,7 @@ if not uringdep.found() add_project_arguments('-DWITHOUT_URING', language: 'cpp') endif -executable('plocate', ['plocate.cpp', 'io_uring_engine.cpp', 'turbopfor.cpp', 'parse_trigrams.cpp', 'serializer.cpp'], +executable('plocate', ['plocate.cpp', 'io_uring_engine.cpp', 'turbopfor.cpp', 'parse_trigrams.cpp', 'serializer.cpp', 'access_rx_cache.cpp'], dependencies: [uringdep, zstddep, threaddep], install: true, install_mode: ['rwxr-sr-x', 'root', 'mlocate']) diff --git a/plocate.cpp b/plocate.cpp index c270e79..0d59076 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -1,3 +1,4 @@ +#include "access_rx_cache.h" #include "db.h" #include "dprintf.h" #include "io_uring_engine.h" @@ -20,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -80,87 +80,6 @@ bool matches(const Needle &needle, const char *haystack) } } -class AccessRXCache { -public: - AccessRXCache(IOUringEngine *engine) - : engine(engine) {} - void check_access(const char *filename, bool allow_async, function cb); - -private: - unordered_map cache; - struct PendingStat { - string filename; - function cb; - }; - map> pending_stats; - IOUringEngine *engine; - mutex mu; -}; - -void AccessRXCache::check_access(const char *filename, bool allow_async, function cb) -{ - lock_guard lock(mu); - if (engine == nullptr || !engine->get_supports_stat()) { - allow_async = false; - } - - for (const char *end = strchr(filename + 1, '/'); end != nullptr; end = strchr(end + 1, '/')) { - string parent_path(filename, end - filename); // string_view from C++20. - auto cache_it = cache.find(parent_path); - if (cache_it != cache.end()) { - // Found in the cache. - if (!cache_it->second) { - cb(false); - return; - } - continue; - } - - if (!allow_async) { - bool ok = access(parent_path.c_str(), R_OK | X_OK) == 0; - cache.emplace(parent_path, ok); - if (!ok) { - cb(false); - return; - } - continue; - } - - // We want to call access(), but it could block on I/O. io_uring doesn't support - // access(), but we can do a dummy asynchonous statx() to populate the kernel's cache, - // which nearly always makes the next access() instantaneous. - - // See if there's already a pending stat that matches this, - // or is a subdirectory. - auto it = pending_stats.lower_bound(parent_path); - if (it != pending_stats.end() && it->first.size() >= parent_path.size() && - it->first.compare(0, parent_path.size(), parent_path) == 0) { - it->second.emplace_back(PendingStat{ filename, move(cb) }); - } else { - it = pending_stats.emplace(filename, vector{}).first; - engine->submit_stat(filename, [this, it, filename{ strdup(filename) }, cb{ move(cb) }] { - // The stat returned, so now do the actual access() calls. - // All of them should be in cache, so don't fire off new statx() - // calls during that check. - check_access(filename, /*allow_async=*/false, move(cb)); - free(filename); - - // Call all others that waited for the same stat() to finish. - // They may fire off new stat() calls if needed. - vector pending = move(it->second); - pending_stats.erase(it); - for (PendingStat &ps : pending) { - check_access(ps.filename.c_str(), /*allow_async=*/true, move(ps.cb)); - } - }); - } - return; // The rest will happen in async context. - } - - // Passed all checks. - cb(true); -} - class Corpus { public: Corpus(int fd, IOUringEngine *engine); -- 2.39.2