]> git.sesse.net Git - plocate/blob - needle.cpp
Proof-of-concept of using ICU for strength-zero searches.
[plocate] / needle.cpp
1 #include "needle.h"
2
3 #include "options.h"
4 #include "parse_trigrams.h"
5
6 #include <assert.h>
7 #include <fnmatch.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <utility>
13
14 #include <unicode/coll.h>
15 #include <unicode/stsearch.h>
16
17 using namespace std;
18
19 bool matches(const Needle &needle, const char *haystack)
20 {
21         UErrorCode status = U_ZERO_ERROR;
22         icu::UnicodeString target(haystack);  // fromUTF8?
23         icu::UnicodeString pattern(needle.str.c_str());
24         icu::Locale locale = icu::Locale::createCanonical(setlocale(LC_CTYPE, NULL));
25         icu::StringSearch search(pattern, target, locale, nullptr, status);
26         search.getCollator()->setStrength(icu::Collator::PRIMARY);
27         //search.setStrength(icu::Collator::PRIMARY);
28
29         int pos = search.first(status);
30         if (U_FAILURE(status)) {
31                 fprintf(stderr, "Could not create a StringSearch object.\n");
32                 exit(1);
33         }
34         return pos != USEARCH_DONE;
35
36 //      if (needle.type == Needle::STRSTR) {
37 //              return strstr(haystack, needle.str.c_str()) != nullptr;
38 //      } else if (needle.type == Needle::GLOB) {
39 //              int flags = ignore_case ? FNM_CASEFOLD : 0;
40 //              return fnmatch(needle.str.c_str(), haystack, flags) == 0;
41 //      } else {
42 //              assert(needle.type == Needle::REGEX);
43 //              return regexec(&needle.re, haystack, /*nmatch=*/0, /*pmatch=*/nullptr, /*flags=*/0) == 0;
44 //      }
45 }
46
47 string unescape_glob_to_plain_string(const string &needle)
48 {
49         string unescaped;
50         for (size_t i = 0; i < needle.size(); i += read_unigram(needle, i).second) {
51                 uint32_t ch = read_unigram(needle, i).first;
52                 assert(ch != WILDCARD_UNIGRAM);
53                 if (ch == PREMATURE_END_UNIGRAM) {
54                         fprintf(stderr, "Pattern '%s' ended prematurely\n", needle.c_str());
55                         exit(1);
56                 }
57                 unescaped.push_back(ch);
58         }
59         return unescaped;
60 }
61
62 regex_t compile_regex(const string &needle)
63 {
64         regex_t re;
65         int flags = REG_NOSUB;
66         if (ignore_case) {
67                 flags |= REG_ICASE;
68         }
69         if (use_extended_regex) {
70                 flags |= REG_EXTENDED;
71         }
72         int err = regcomp(&re, needle.c_str(), flags);
73         if (err != 0) {
74                 char errbuf[256];
75                 regerror(err, &re, errbuf, sizeof(errbuf));
76                 fprintf(stderr, "Error when compiling regex '%s': %s\n", needle.c_str(), errbuf);
77                 exit(1);
78         }
79         return re;
80 }