From 601e6fef22bc018be1ca3b19215e54a2c2edcb9d Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 16 Oct 2020 00:42:25 +0200 Subject: [PATCH] Move several needle/searching related functions into its own file. --- meson.build | 2 +- needle.cpp | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++ needle.h | 19 ++++++++++++++++ plocate.cpp | 60 +-------------------------------------------------- 4 files changed, 83 insertions(+), 60 deletions(-) create mode 100644 needle.cpp create mode 100644 needle.h diff --git a/meson.build b/meson.build index d6d0e99..913fc0f 100644 --- a/meson.build +++ b/meson.build @@ -12,7 +12,7 @@ if not uringdep.found() add_project_arguments('-DWITHOUT_URING', language: 'cpp') endif -executable('plocate', ['plocate.cpp', 'io_uring_engine.cpp', 'turbopfor.cpp', 'parse_trigrams.cpp', 'serializer.cpp', 'access_rx_cache.cpp'], +executable('plocate', ['plocate.cpp', 'io_uring_engine.cpp', 'turbopfor.cpp', 'parse_trigrams.cpp', 'serializer.cpp', 'access_rx_cache.cpp', 'needle.cpp'], dependencies: [uringdep, zstddep, threaddep], install: true, install_mode: ['rwxr-sr-x', 'root', 'mlocate']) diff --git a/needle.cpp b/needle.cpp new file mode 100644 index 0000000..60f1698 --- /dev/null +++ b/needle.cpp @@ -0,0 +1,62 @@ +#include "needle.h" + +#include "options.h" +#include "parse_trigrams.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +bool matches(const Needle &needle, const char *haystack) +{ + if (needle.type == Needle::STRSTR) { + return strstr(haystack, needle.str.c_str()) != nullptr; + } else if (needle.type == Needle::GLOB) { + int flags = ignore_case ? FNM_CASEFOLD : 0; + return fnmatch(needle.str.c_str(), haystack, flags) == 0; + } else { + assert(needle.type == Needle::REGEX); + return regexec(&needle.re, haystack, /*nmatch=*/0, /*pmatch=*/nullptr, /*flags=*/0) == 0; + } +} + +string unescape_glob_to_plain_string(const string &needle) +{ + string unescaped; + for (size_t i = 0; i < needle.size(); i += read_unigram(needle, i).second) { + uint32_t ch = read_unigram(needle, i).first; + assert(ch != WILDCARD_UNIGRAM); + if (ch == PREMATURE_END_UNIGRAM) { + fprintf(stderr, "Pattern '%s' ended prematurely\n", needle.c_str()); + exit(1); + } + unescaped.push_back(ch); + } + return unescaped; +} + +regex_t compile_regex(const string &needle) +{ + regex_t re; + int flags = REG_NOSUB; + if (ignore_case) { + flags |= REG_ICASE; + } + if (use_extended_regex) { + flags |= REG_EXTENDED; + } + int err = regcomp(&re, needle.c_str(), flags); + if (err != 0) { + char errbuf[256]; + regerror(err, &re, errbuf, sizeof(errbuf)); + fprintf(stderr, "Error when compiling regex '%s': %s\n", needle.c_str(), errbuf); + exit(1); + } + return re; +} diff --git a/needle.h b/needle.h new file mode 100644 index 0000000..ce443f2 --- /dev/null +++ b/needle.h @@ -0,0 +1,19 @@ +#ifndef _NEEDLE_H +#define _NEEDLE_H 1 + +#include +#include + +struct Needle { + enum { STRSTR, + REGEX, + GLOB } type; + std::string str; // Filled in no matter what. + regex_t re; // For REGEX. +}; + +bool matches(const Needle &needle, const char *haystack); +std::string unescape_glob_to_plain_string(const std::string &needle); +regex_t compile_regex(const std::string &needle); + +#endif // !defined(_NEEDLE_H) diff --git a/plocate.cpp b/plocate.cpp index 0d59076..4141a31 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -2,6 +2,7 @@ #include "db.h" #include "dprintf.h" #include "io_uring_engine.h" +#include "needle.h" #include "parse_trigrams.h" #include "serializer.h" #include "turbopfor.h" @@ -14,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -57,29 +57,6 @@ int64_t limit_left = numeric_limits::max(); steady_clock::time_point start; ZSTD_DDict *ddict = nullptr; -regex_t compile_regex(const string &needle); - -struct Needle { - enum { STRSTR, - REGEX, - GLOB } type; - string str; // Filled in no matter what. - regex_t re; // For REGEX. -}; - -bool matches(const Needle &needle, const char *haystack) -{ - if (needle.type == Needle::STRSTR) { - return strstr(haystack, needle.str.c_str()) != nullptr; - } else if (needle.type == Needle::GLOB) { - int flags = ignore_case ? FNM_CASEFOLD : 0; - return fnmatch(needle.str.c_str(), haystack, flags) == 0; - } else { - assert(needle.type == Needle::REGEX); - return regexec(&needle.re, haystack, /*nmatch=*/0, /*pmatch=*/nullptr, /*flags=*/0) == 0; - } -} - class Corpus { public: Corpus(int fd, IOUringEngine *engine); @@ -656,41 +633,6 @@ void do_search_file(const vector &needles, const char *filename) } } -string unescape_glob_to_plain_string(const string &needle) -{ - string unescaped; - for (size_t i = 0; i < needle.size(); i += read_unigram(needle, i).second) { - uint32_t ch = read_unigram(needle, i).first; - assert(ch != WILDCARD_UNIGRAM); - if (ch == PREMATURE_END_UNIGRAM) { - fprintf(stderr, "Pattern '%s' ended prematurely\n", needle.c_str()); - exit(1); - } - unescaped.push_back(ch); - } - return unescaped; -} - -regex_t compile_regex(const string &needle) -{ - regex_t re; - int flags = REG_NOSUB; - if (ignore_case) { - flags |= REG_ICASE; - } - if (use_extended_regex) { - flags |= REG_EXTENDED; - } - int err = regcomp(&re, needle.c_str(), flags); - if (err != 0) { - char errbuf[256]; - regerror(err, &re, errbuf, sizeof(errbuf)); - fprintf(stderr, "Error when compiling regex '%s': %s\n", needle.c_str(), errbuf); - exit(1); - } - return re; -} - void usage() { printf( -- 2.39.2