--- /dev/null
+#include "access_rx_cache.h"
+
+#include "io_uring_engine.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <utility>
+
+using namespace std;
+
+void AccessRXCache::check_access(const char *filename, bool allow_async, function<void(bool)> cb)
+{
+ lock_guard<mutex> lock(mu);
+ if (engine == nullptr || !engine->get_supports_stat()) {
+ allow_async = false;
+ }
+
+ for (const char *end = strchr(filename + 1, '/'); end != nullptr; end = strchr(end + 1, '/')) {
+ string parent_path(filename, end - filename); // string_view from C++20.
+ auto cache_it = cache.find(parent_path);
+ if (cache_it != cache.end()) {
+ // Found in the cache.
+ if (!cache_it->second) {
+ cb(false);
+ return;
+ }
+ continue;
+ }
+
+ if (!allow_async) {
+ bool ok = access(parent_path.c_str(), R_OK | X_OK) == 0;
+ cache.emplace(parent_path, ok);
+ if (!ok) {
+ cb(false);
+ return;
+ }
+ continue;
+ }
+
+ // We want to call access(), but it could block on I/O. io_uring doesn't support
+ // access(), but we can do a dummy asynchonous statx() to populate the kernel's cache,
+ // which nearly always makes the next access() instantaneous.
+
+ // See if there's already a pending stat that matches this,
+ // or is a subdirectory.
+ auto it = pending_stats.lower_bound(parent_path);
+ if (it != pending_stats.end() && it->first.size() >= parent_path.size() &&
+ it->first.compare(0, parent_path.size(), parent_path) == 0) {
+ it->second.emplace_back(PendingStat{ filename, move(cb) });
+ } else {
+ it = pending_stats.emplace(filename, vector<PendingStat>{}).first;
+ engine->submit_stat(filename, [this, it, filename{ strdup(filename) }, cb{ move(cb) }] {
+ // The stat returned, so now do the actual access() calls.
+ // All of them should be in cache, so don't fire off new statx()
+ // calls during that check.
+ check_access(filename, /*allow_async=*/false, move(cb));
+ free(filename);
+
+ // Call all others that waited for the same stat() to finish.
+ // They may fire off new stat() calls if needed.
+ vector<PendingStat> pending = move(it->second);
+ pending_stats.erase(it);
+ for (PendingStat &ps : pending) {
+ check_access(ps.filename.c_str(), /*allow_async=*/true, move(ps.cb));
+ }
+ });
+ }
+ return; // The rest will happen in async context.
+ }
+
+ // Passed all checks.
+ cb(true);
+}
--- /dev/null
+#ifndef _ACCESS_RX_CACHE_H
+#define _ACCESS_RX_CACHE_H 1
+
+#include <functional>
+#include <map>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+class IOUringEngine;
+
+class AccessRXCache {
+public:
+ AccessRXCache(IOUringEngine *engine)
+ : engine(engine) {}
+ void check_access(const char *filename, bool allow_async, std::function<void(bool)> cb);
+
+private:
+ std::unordered_map<std::string, bool> cache;
+ struct PendingStat {
+ std::string filename;
+ std::function<void(bool)> cb;
+ };
+ std::map<std::string, std::vector<PendingStat>> pending_stats;
+ IOUringEngine *engine;
+ std::mutex mu;
+};
+
+#endif // !defined(_ACCESS_RX_CACHE_H)
add_project_arguments('-DWITHOUT_URING', language: 'cpp')
endif
-executable('plocate', ['plocate.cpp', 'io_uring_engine.cpp', 'turbopfor.cpp', 'parse_trigrams.cpp', 'serializer.cpp'],
+executable('plocate', ['plocate.cpp', 'io_uring_engine.cpp', 'turbopfor.cpp', 'parse_trigrams.cpp', 'serializer.cpp', 'access_rx_cache.cpp'],
dependencies: [uringdep, zstddep, threaddep],
install: true,
install_mode: ['rwxr-sr-x', 'root', 'mlocate'])
+#include "access_rx_cache.h"
#include "db.h"
#include "dprintf.h"
#include "io_uring_engine.h"
#include <iterator>
#include <limits>
#include <locale.h>
-#include <map>
#include <memory>
#include <mutex>
#include <regex.h>
}
}
-class AccessRXCache {
-public:
- AccessRXCache(IOUringEngine *engine)
- : engine(engine) {}
- void check_access(const char *filename, bool allow_async, function<void(bool)> cb);
-
-private:
- unordered_map<string, bool> cache;
- struct PendingStat {
- string filename;
- function<void(bool)> cb;
- };
- map<string, vector<PendingStat>> pending_stats;
- IOUringEngine *engine;
- mutex mu;
-};
-
-void AccessRXCache::check_access(const char *filename, bool allow_async, function<void(bool)> cb)
-{
- lock_guard<mutex> lock(mu);
- if (engine == nullptr || !engine->get_supports_stat()) {
- allow_async = false;
- }
-
- for (const char *end = strchr(filename + 1, '/'); end != nullptr; end = strchr(end + 1, '/')) {
- string parent_path(filename, end - filename); // string_view from C++20.
- auto cache_it = cache.find(parent_path);
- if (cache_it != cache.end()) {
- // Found in the cache.
- if (!cache_it->second) {
- cb(false);
- return;
- }
- continue;
- }
-
- if (!allow_async) {
- bool ok = access(parent_path.c_str(), R_OK | X_OK) == 0;
- cache.emplace(parent_path, ok);
- if (!ok) {
- cb(false);
- return;
- }
- continue;
- }
-
- // We want to call access(), but it could block on I/O. io_uring doesn't support
- // access(), but we can do a dummy asynchonous statx() to populate the kernel's cache,
- // which nearly always makes the next access() instantaneous.
-
- // See if there's already a pending stat that matches this,
- // or is a subdirectory.
- auto it = pending_stats.lower_bound(parent_path);
- if (it != pending_stats.end() && it->first.size() >= parent_path.size() &&
- it->first.compare(0, parent_path.size(), parent_path) == 0) {
- it->second.emplace_back(PendingStat{ filename, move(cb) });
- } else {
- it = pending_stats.emplace(filename, vector<PendingStat>{}).first;
- engine->submit_stat(filename, [this, it, filename{ strdup(filename) }, cb{ move(cb) }] {
- // The stat returned, so now do the actual access() calls.
- // All of them should be in cache, so don't fire off new statx()
- // calls during that check.
- check_access(filename, /*allow_async=*/false, move(cb));
- free(filename);
-
- // Call all others that waited for the same stat() to finish.
- // They may fire off new stat() calls if needed.
- vector<PendingStat> pending = move(it->second);
- pending_stats.erase(it);
- for (PendingStat &ps : pending) {
- check_access(ps.filename.c_str(), /*allow_async=*/true, move(ps.cb));
- }
- });
- }
- return; // The rest will happen in async context.
- }
-
- // Passed all checks.
- cb(true);
-}
-
class Corpus {
public:
Corpus(int fd, IOUringEngine *engine);