From cbecd483f57c465b2ad6d3867c760c2e5b5e79aa Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 29 Oct 2020 23:42:01 +0100 Subject: [PATCH] Escape unprintable characters when outputting filenames to a terminal. Filenames are generally untrusted, and can contain any kind of cruft. In particular, there have been terminals (hopefully not in wide use anymore!) that will do insanity like running specific commands when seeing a specific escape sequence. More prosaically, embedded newlines can make for confusing output. Thus, escape any nonprintable characters in a shell-parseable way, much the same way GNU ls does these days. Also escape quotes, backslashes and the likes to make sure nothing unescaped looks like it's escaped. This doesn't mean it's safe to take whatever and parse it uncritically (we don't escape $, for instance), but it's generally good enough. Escaping is disabled when doing zero-terminated output, or when printing to a pipe or file. --- options.h | 1 + plocate.cpp | 5 ++ serializer.cpp | 127 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 123 insertions(+), 10 deletions(-) diff --git a/options.h b/options.h index 68f712e..5dc5ad4 100644 --- a/options.h +++ b/options.h @@ -13,5 +13,6 @@ extern bool patterns_are_regex; extern bool use_extended_regex; extern int64_t limit_matches; extern int64_t limit_left; // Not strictly an option. +extern bool stdout_is_tty; // Same. #endif // !defined(_OPTIONS_H) diff --git a/plocate.cpp b/plocate.cpp index c06a28b..423aaf7 100644 --- a/plocate.cpp +++ b/plocate.cpp @@ -55,6 +55,7 @@ bool use_extended_regex = false; bool match_basename = false; int64_t limit_matches = numeric_limits::max(); int64_t limit_left = numeric_limits::max(); +bool stdout_is_tty = false; steady_clock::time_point start; ZSTD_DDict *ddict = nullptr; @@ -767,6 +768,10 @@ int main(int argc, char **argv) } } + if (!print_nul) { + stdout_is_tty = isatty(1); + } + vector needles; for (int i = optind; i < argc; ++i) { Needle needle; diff --git a/serializer.cpp b/serializer.cpp index 10a1564..f20c304 100644 --- a/serializer.cpp +++ b/serializer.cpp @@ -27,6 +27,121 @@ void apply_limit() exit(0); } +void print_possibly_escaped(const string &str) +{ + if (print_nul) { + printf("%s%c", str.c_str(), 0); + return; + } else if (!stdout_is_tty) { + printf("%s\n", str.c_str()); + return; + } + + // stdout is a terminal, so we should protect the user against + // escapes, stray newlines and the likes. First of all, check if + // all the characters are safe; we consider everything safe that + // isn't a control character, ', " or \. People could make + // filenames like "$(rm -rf)", but that's out-of-scope. + const char *ptr = str.data(); + size_t len = str.size(); + + mbtowc(nullptr, 0, 0); + wchar_t pwc; + bool all_safe = true; + do { + int ret = mbtowc(&pwc, ptr, len); + if (ret == -1) { + all_safe = false; // Malformed data. + } else if (ret == 0) { + break; // EOF. + } else if (pwc < 32 || pwc == '\'' || pwc == '"' || pwc == '\\') { + all_safe = false; + } else { + ptr += ret; + len -= ret; + } + } while (all_safe); + + if (all_safe) { + printf("%s\n", str.c_str()); + return; + } + + // Print escaped, but in such a way that the user can easily take the + // escaped output and paste into the shell. We print much like GNU ls does, + // ie., using the shell $'foo' construct whenever we need to print something + // escaped. + bool in_escaped_mode = false; + printf("'"); + + mbtowc(nullptr, 0, 0); + ptr = str.data(); + len = str.size(); + for (;;) { + int ret = mbtowc(nullptr, ptr, len); + if (ret == -1) { + // Malformed data. + printf("?"); + ++ptr; + --len; + } else if (ret == 0) { + break; // EOF. + } + if (*ptr < 32 || *ptr == '\'' || *ptr == '"' || *ptr == '\\') { + if (!in_escaped_mode) { + printf("'$'"); + in_escaped_mode = true; + } + + // The list of allowed escapes is from bash(1). + switch (*ptr) { + case '\a': + printf("\\a"); + break; + case '\b': + printf("\\b"); + break; + case '\f': + printf("\\f"); + break; + case '\n': + printf("\\n"); + break; + case '\r': + printf("\\r"); + break; + case '\t': + printf("\\t"); + break; + case '\v': + printf("\\v"); + break; + case '\\': + printf("\\\\"); + break; + case '\'': + printf("\\'"); + break; + case '"': + printf("\\\""); + break; + default: + printf("\\%03o", *ptr); + break; + } + } else { + if (in_escaped_mode) { + printf("''"); + in_escaped_mode = false; + } + fwrite(ptr, ret, 1, stdout); + } + ptr += ret; + len -= ret; + } + printf("'\n"); +} + void Serializer::print(uint64_t seq, uint64_t skip, const string msg) { if (only_count) { @@ -42,11 +157,7 @@ void Serializer::print(uint64_t seq, uint64_t skip, const string msg) } if (!msg.empty()) { - if (print_nul) { - printf("%s%c", msg.c_str(), 0); - } else { - printf("%s\n", msg.c_str()); - } + print_possibly_escaped(msg); apply_limit(); } next_seq += skip; @@ -54,11 +165,7 @@ void Serializer::print(uint64_t seq, uint64_t skip, const string msg) // See if any delayed prints can now be dealt with. while (!pending.empty() && pending.top().seq == next_seq) { if (!pending.top().msg.empty()) { - if (print_nul) { - printf("%s%c", pending.top().msg.c_str(), 0); - } else { - printf("%s\n", pending.top().msg.c_str()); - } + print_possibly_escaped(pending.top().msg); apply_limit(); } next_seq += pending.top().skip; -- 2.39.2