]> git.sesse.net Git - plocate/blob - serializer.cpp
Fix an infinite loop when encountering invalid UTF-8 in file names.
[plocate] / serializer.cpp
1 #include "serializer.h"
2
3 #include "dprintf.h"
4
5 #include <chrono>
6 #include <inttypes.h>
7 #include <memory>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <utility>
11
12 using namespace std;
13 using namespace std::chrono;
14
15 extern steady_clock::time_point start;
16
17 void apply_limit()
18 {
19         if (--limit_left > 0) {
20                 return;
21         }
22         dprintf("Done in %.1f ms, found %" PRId64 " matches.\n",
23                 1e3 * duration<float>(steady_clock::now() - start).count(), limit_matches);
24         if (only_count) {
25                 printf("%" PRId64 "\n", limit_matches);
26         }
27         exit(0);
28 }
29
30 void print_possibly_escaped(const string &str)
31 {
32         if (print_nul) {
33                 printf("%s%c", str.c_str(), 0);
34                 return;
35         } else if (!stdout_is_tty) {
36                 printf("%s\n", str.c_str());
37                 return;
38         }
39
40         // stdout is a terminal, so we should protect the user against
41         // escapes, stray newlines and the likes. First of all, check if
42         // all the characters are safe; we consider everything safe that
43         // isn't a control character, ', " or \. People could make
44         // filenames like "$(rm -rf)", but that's out-of-scope.
45         const char *ptr = str.data();
46         size_t len = str.size();
47
48         mbtowc(nullptr, 0, 0);
49         wchar_t pwc;
50         bool all_safe = true;
51         do {
52                 int ret = mbtowc(&pwc, ptr, len);
53                 if (ret == -1) {
54                         all_safe = false;  // Malformed data.
55                 } else if (ret == 0) {
56                         break;  // EOF.
57                 } else if (pwc < 32 || pwc == '\'' || pwc == '"' || pwc == '\\') {
58                         all_safe = false;
59                 } else {
60                         ptr += ret;
61                         len -= ret;
62                 }
63         } while (all_safe && *ptr != '\0');
64
65         if (all_safe) {
66                 printf("%s\n", str.c_str());
67                 return;
68         }
69
70         // Print escaped, but in such a way that the user can easily take the
71         // escaped output and paste into the shell. We print much like GNU ls does,
72         // ie., using the shell $'foo' construct whenever we need to print something
73         // escaped.
74         bool in_escaped_mode = false;
75         printf("'");
76
77         mbtowc(nullptr, 0, 0);
78         ptr = str.data();
79         len = str.size();
80         while (*ptr != '\0') {
81                 int ret = mbtowc(nullptr, ptr, len);
82                 if (ret == -1) {
83                         // Malformed data.
84                         printf("?");
85                         ++ptr;
86                         --len;
87                         continue;
88                 } else if (ret == 0) {
89                         break;  // EOF.
90                 }
91                 if (*ptr < 32 || *ptr == '\'' || *ptr == '"' || *ptr == '\\') {
92                         if (!in_escaped_mode) {
93                                 printf("'$'");
94                                 in_escaped_mode = true;
95                         }
96
97                         // The list of allowed escapes is from bash(1).
98                         switch (*ptr) {
99                         case '\a':
100                                 printf("\\a");
101                                 break;
102                         case '\b':
103                                 printf("\\b");
104                                 break;
105                         case '\f':
106                                 printf("\\f");
107                                 break;
108                         case '\n':
109                                 printf("\\n");
110                                 break;
111                         case '\r':
112                                 printf("\\r");
113                                 break;
114                         case '\t':
115                                 printf("\\t");
116                                 break;
117                         case '\v':
118                                 printf("\\v");
119                                 break;
120                         case '\\':
121                                 printf("\\\\");
122                                 break;
123                         case '\'':
124                                 printf("\\'");
125                                 break;
126                         case '"':
127                                 printf("\\\"");
128                                 break;
129                         default:
130                                 printf("\\%03o", *ptr);
131                                 break;
132                         }
133                 } else {
134                         if (in_escaped_mode) {
135                                 printf("''");
136                                 in_escaped_mode = false;
137                         }
138                         fwrite(ptr, ret, 1, stdout);
139                 }
140                 ptr += ret;
141                 len -= ret;
142         }
143         printf("'\n");
144 }
145
146 void Serializer::print(uint64_t seq, uint64_t skip, const string msg)
147 {
148         if (only_count) {
149                 if (!msg.empty()) {
150                         apply_limit();
151                 }
152                 return;
153         }
154
155         if (next_seq != seq) {
156                 pending.push(Element{ seq, skip, move(msg) });
157                 return;
158         }
159
160         if (!msg.empty()) {
161                 print_possibly_escaped(msg);
162                 apply_limit();
163         }
164         next_seq += skip;
165
166         // See if any delayed prints can now be dealt with.
167         while (!pending.empty() && pending.top().seq == next_seq) {
168                 if (!pending.top().msg.empty()) {
169                         print_possibly_escaped(pending.top().msg);
170                         apply_limit();
171                 }
172                 next_seq += pending.top().skip;
173                 pending.pop();
174         }
175 }