X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=serializer.cpp;h=356094fde659f86715a730d76cf5801135782af0;hb=fd6198891d6fd9642effc0843fef6f23b991af3e;hp=9277125927b1681c501fadc8c2cdaff5a4db897a;hpb=4ecef43ea601ab99ad2f2cf7715dbc52e8f3c775;p=plocate diff --git a/serializer.cpp b/serializer.cpp index 9277125..356094f 100644 --- a/serializer.cpp +++ b/serializer.cpp @@ -1,13 +1,14 @@ +#include "serializer.h" + +#include "dprintf.h" + +#include #include +#include #include #include -#include -#include #include -#include "dprintf.h" -#include "serializer.h" - using namespace std; using namespace std::chrono; @@ -26,6 +27,125 @@ void apply_limit() exit(0); } +void print_possibly_escaped(const string &str) +{ + if (print_nul) { + printf("%s%c", str.c_str(), 0); + return; + } else if (literal_printing || !stdout_is_tty) { + printf("%s\n", str.c_str()); + return; + } + + // stdout is a terminal, so we should protect the user against + // escapes, stray newlines and the likes. First of all, check if + // all the characters are safe; we consider everything safe that + // isn't a control character, ', " or \. People could make + // filenames like "$(rm -rf)", but that's out-of-scope. + const char *ptr = str.data(); + size_t len = str.size(); + + mbtowc(nullptr, 0, 0); + wchar_t pwc; + bool all_safe = true; + do { + int ret = mbtowc(&pwc, ptr, len); + if (ret == -1) { + all_safe = false; // Malformed data. + } else if (ret == 0) { + break; // EOF. + } else if (pwc < 32 || pwc == '\'' || pwc == '"' || pwc == '\\') { + all_safe = false; + } else if (pwc == '`') { + // A rather odd case; ls quotes this but does not escape it. + all_safe = false; + } else { + ptr += ret; + len -= ret; + } + } while (all_safe && *ptr != '\0'); + + if (all_safe) { + printf("%s\n", str.c_str()); + return; + } + + // Print escaped, but in such a way that the user can easily take the + // escaped output and paste into the shell. We print much like GNU ls does, + // ie., using the shell $'foo' construct whenever we need to print something + // escaped. + bool in_escaped_mode = false; + printf("'"); + + mbtowc(nullptr, 0, 0); + ptr = str.data(); + len = str.size(); + while (*ptr != '\0') { + int ret = mbtowc(nullptr, ptr, len); + if (ret == -1) { + // Malformed data. + printf("?"); + ++ptr; + --len; + continue; + } else if (ret == 0) { + break; // EOF. + } + if ((unsigned char)*ptr < 32 || *ptr == '\'' || *ptr == '"' || *ptr == '\\') { + if (!in_escaped_mode) { + printf("'$'"); + in_escaped_mode = true; + } + + // The list of allowed escapes is from bash(1). + switch (*ptr) { + case '\a': + printf("\\a"); + break; + case '\b': + printf("\\b"); + break; + case '\f': + printf("\\f"); + break; + case '\n': + printf("\\n"); + break; + case '\r': + printf("\\r"); + break; + case '\t': + printf("\\t"); + break; + case '\v': + printf("\\v"); + break; + case '\\': + printf("\\\\"); + break; + case '\'': + printf("\\'"); + break; + case '"': + printf("\\\""); + break; + default: + printf("\\%03o", *ptr); + break; + } + } else { + if (in_escaped_mode) { + printf("''"); + in_escaped_mode = false; + } + fwrite(ptr, ret, 1, stdout); + } + ptr += ret; + len -= ret; + } + printf("'\n"); +} + void Serializer::print(uint64_t seq, uint64_t skip, const string msg) { if (only_count) { @@ -41,11 +161,7 @@ void Serializer::print(uint64_t seq, uint64_t skip, const string msg) } if (!msg.empty()) { - if (print_nul) { - printf("%s%c", msg.c_str(), 0); - } else { - printf("%s\n", msg.c_str()); - } + print_possibly_escaped(msg); apply_limit(); } next_seq += skip; @@ -53,15 +169,10 @@ void Serializer::print(uint64_t seq, uint64_t skip, const string msg) // See if any delayed prints can now be dealt with. while (!pending.empty() && pending.top().seq == next_seq) { if (!pending.top().msg.empty()) { - if (print_nul) { - printf("%s%c", pending.top().msg.c_str(), 0); - } else { - printf("%s\n", pending.top().msg.c_str()); - } + print_possibly_escaped(pending.top().msg); apply_limit(); } next_seq += pending.top().skip; pending.pop(); } } -