X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=serializer.cpp;h=f20c304de54e690a5c41852127da93b6b8c3c2bb;hb=cbecd483f57c465b2ad6d3867c760c2e5b5e79aa;hp=10a156411b5e551cec7e51be66907fef545a80c0;hpb=93d57f8f19e57efbb91e139bfca1064bd9e27bb3;p=plocate diff --git a/serializer.cpp b/serializer.cpp index 10a1564..f20c304 100644 --- a/serializer.cpp +++ b/serializer.cpp @@ -27,6 +27,121 @@ void apply_limit() exit(0); } +void print_possibly_escaped(const string &str) +{ + if (print_nul) { + printf("%s%c", str.c_str(), 0); + return; + } else if (!stdout_is_tty) { + printf("%s\n", str.c_str()); + return; + } + + // stdout is a terminal, so we should protect the user against + // escapes, stray newlines and the likes. First of all, check if + // all the characters are safe; we consider everything safe that + // isn't a control character, ', " or \. People could make + // filenames like "$(rm -rf)", but that's out-of-scope. + const char *ptr = str.data(); + size_t len = str.size(); + + mbtowc(nullptr, 0, 0); + wchar_t pwc; + bool all_safe = true; + do { + int ret = mbtowc(&pwc, ptr, len); + if (ret == -1) { + all_safe = false; // Malformed data. + } else if (ret == 0) { + break; // EOF. + } else if (pwc < 32 || pwc == '\'' || pwc == '"' || pwc == '\\') { + all_safe = false; + } else { + ptr += ret; + len -= ret; + } + } while (all_safe); + + if (all_safe) { + printf("%s\n", str.c_str()); + return; + } + + // Print escaped, but in such a way that the user can easily take the + // escaped output and paste into the shell. We print much like GNU ls does, + // ie., using the shell $'foo' construct whenever we need to print something + // escaped. + bool in_escaped_mode = false; + printf("'"); + + mbtowc(nullptr, 0, 0); + ptr = str.data(); + len = str.size(); + for (;;) { + int ret = mbtowc(nullptr, ptr, len); + if (ret == -1) { + // Malformed data. + printf("?"); + ++ptr; + --len; + } else if (ret == 0) { + break; // EOF. + } + if (*ptr < 32 || *ptr == '\'' || *ptr == '"' || *ptr == '\\') { + if (!in_escaped_mode) { + printf("'$'"); + in_escaped_mode = true; + } + + // The list of allowed escapes is from bash(1). + switch (*ptr) { + case '\a': + printf("\\a"); + break; + case '\b': + printf("\\b"); + break; + case '\f': + printf("\\f"); + break; + case '\n': + printf("\\n"); + break; + case '\r': + printf("\\r"); + break; + case '\t': + printf("\\t"); + break; + case '\v': + printf("\\v"); + break; + case '\\': + printf("\\\\"); + break; + case '\'': + printf("\\'"); + break; + case '"': + printf("\\\""); + break; + default: + printf("\\%03o", *ptr); + break; + } + } else { + if (in_escaped_mode) { + printf("''"); + in_escaped_mode = false; + } + fwrite(ptr, ret, 1, stdout); + } + ptr += ret; + len -= ret; + } + printf("'\n"); +} + void Serializer::print(uint64_t seq, uint64_t skip, const string msg) { if (only_count) { @@ -42,11 +157,7 @@ void Serializer::print(uint64_t seq, uint64_t skip, const string msg) } if (!msg.empty()) { - if (print_nul) { - printf("%s%c", msg.c_str(), 0); - } else { - printf("%s\n", msg.c_str()); - } + print_possibly_escaped(msg); apply_limit(); } next_seq += skip; @@ -54,11 +165,7 @@ void Serializer::print(uint64_t seq, uint64_t skip, const string msg) // See if any delayed prints can now be dealt with. while (!pending.empty() && pending.top().seq == next_seq) { if (!pending.top().msg.empty()) { - if (print_nul) { - printf("%s%c", pending.top().msg.c_str(), 0); - } else { - printf("%s\n", pending.top().msg.c_str()); - } + print_possibly_escaped(pending.top().msg); apply_limit(); } next_seq += pending.top().skip;