From: Steinar H. Gunderson Date: Wed, 22 Jul 2015 13:05:14 +0000 (+0200) Subject: Parse and log referer and user-agent headers in requests. X-Git-Tag: 1.2.0~16 X-Git-Url: https://git.sesse.net/?p=cubemap;a=commitdiff_plain;h=207ca0494024641f27537ad3cf047814d8092678 Parse and log referer and user-agent headers in requests. --- diff --git a/accesslog.cpp b/accesslog.cpp index 4414650..3e1ebe5 100644 --- a/accesslog.cpp +++ b/accesslog.cpp @@ -58,14 +58,16 @@ void AccessLogThread::do_work() log_perror("clock_gettime(CLOCK_MONOTONIC_COARSE)"); } else { for (size_t i = 0; i < writes.size(); ++i) { - fprintf(logfp, "%llu %s %s %d %llu %llu %llu\n", + fprintf(logfp, "%llu %s %s %d %llu %llu %llu \"%s\" \"%s\"\n", (long long unsigned)(writes[i].connect_time.tv_sec), writes[i].remote_addr.c_str(), writes[i].url.c_str(), int(now.tv_sec - writes[i].connect_time.tv_sec), // Rather coarse. (long long unsigned)(writes[i].bytes_sent), (long long unsigned)(writes[i].bytes_lost), - (long long unsigned)(writes[i].num_loss_events)); + (long long unsigned)(writes[i].num_loss_events), + writes[i].referer.c_str(), + writes[i].user_agent.c_str()); } fflush(logfp); } diff --git a/client.cpp b/client.cpp index f191a40..786e178 100644 --- a/client.cpp +++ b/client.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -63,6 +64,8 @@ Client::Client(int sock) Client::Client(const ClientProto &serialized, Stream *stream) : sock(serialized.sock()), remote_addr(serialized.remote_addr()), + referer(serialized.referer()), + user_agent(serialized.user_agent()), state(State(serialized.state())), request(serialized.request()), url(serialized.url()), @@ -90,6 +93,8 @@ ClientProto Client::serialize() const ClientProto serialized; serialized.set_sock(sock); serialized.set_remote_addr(remote_addr); + serialized.set_referer(referer); + serialized.set_user_agent(user_agent); serialized.set_connect_time_sec(connect_time.tv_sec); serialized.set_connect_time_nsec(connect_time.tv_nsec); serialized.set_state(state); @@ -103,6 +108,24 @@ ClientProto Client::serialize() const serialized.set_num_loss_events(num_loss_events); return serialized; } + +namespace { + +string escape_string(const string &str) { + string ret; + for (size_t i = 0; i < str.size(); ++i) { + char buf[16]; + if (isprint(str[i]) && str[i] >= 32 && str[i] != '"' && str[i] != '\\') { + ret.push_back(str[i]); + } else { + snprintf(buf, sizeof(buf), "\\x%02x", (unsigned char)str[i]); + ret += buf; + } + } + return ret; +} + +} // namespace ClientStats Client::get_stats() const { @@ -114,6 +137,8 @@ ClientStats Client::get_stats() const } stats.sock = sock; stats.remote_addr = remote_addr; + stats.referer = escape_string(referer); + stats.user_agent = escape_string(user_agent); stats.connect_time = connect_time; stats.bytes_sent = bytes_sent; stats.bytes_lost = bytes_lost; diff --git a/client.h b/client.h index 4e80031..954ad41 100644 --- a/client.h +++ b/client.h @@ -11,10 +11,13 @@ class ClientProto; struct Stream; // Digested statistics for writing to logs etc. +// Note that "referer" and "user_agent" here are already escaped for scary characters. struct ClientStats { std::string url; int sock; std::string remote_addr; + std::string referer; + std::string user_agent; timespec connect_time; size_t bytes_sent; size_t bytes_lost; @@ -38,6 +41,8 @@ struct Client { // Some information only used for logging. std::string remote_addr; + std::string referer; + std::string user_agent; enum State { READING_REQUEST, SENDING_HEADER, SENDING_DATA, SENDING_ERROR, WAITING_FOR_KEYFRAME, PREBUFFERING }; State state; diff --git a/server.cpp b/server.cpp index 079b629..2f7a332 100644 --- a/server.cpp +++ b/server.cpp @@ -571,6 +571,18 @@ int Server::parse_request(Client *client) return 400; // Bad request (empty). } + // Parse the headers, for logging purposes. + // TODO: Case-insensitivity. + multimap headers = extract_headers(lines, client->remote_addr); + multimap::const_iterator referer_it = headers.find("Referer"); + if (referer_it != headers.end()) { + client->referer = referer_it->second; + } + multimap::const_iterator user_agent_it = headers.find("User-Agent"); + if (user_agent_it != headers.end()) { + client->user_agent = user_agent_it->second; + } + vector request_tokens = split_tokens(lines[0]); if (request_tokens.size() < 2) { return 400; // Bad request (empty). @@ -598,6 +610,7 @@ int Server::parse_request(Client *client) } client->url = request_tokens[1]; + client->stream = stream; if (setsockopt(client->sock, SOL_SOCKET, SO_MAX_PACING_RATE, &client->stream->pacing_rate, sizeof(client->stream->pacing_rate)) == -1) { if (client->stream->pacing_rate != ~0U) { diff --git a/state.proto b/state.proto index 30b63da..3a61108 100644 --- a/state.proto +++ b/state.proto @@ -14,6 +14,8 @@ message ClientProto { optional int64 bytes_sent = 10; optional int64 bytes_lost = 11; optional int64 num_loss_events = 12; + optional bytes referer = 15; + optional bytes user_agent = 16; }; // Corresponds to struct Stream. diff --git a/stats.cpp b/stats.cpp index b0765ed..5cfa669 100644 --- a/stats.cpp +++ b/stats.cpp @@ -59,7 +59,7 @@ void StatsThread::do_work() client_stats = servers->get_client_stats(); for (size_t i = 0; i < client_stats.size(); ++i) { - fprintf(fp, "%s %d %d %s %d %llu %llu %llu\n", + fprintf(fp, "%s %d %d %s %d %llu %llu %llu \"%s\" \"%s\"\n", client_stats[i].remote_addr.c_str(), client_stats[i].sock, 0, // Used to be fwmark. @@ -67,7 +67,9 @@ void StatsThread::do_work() int(now.tv_sec - client_stats[i].connect_time.tv_sec), // Rather coarse. (long long unsigned)(client_stats[i].bytes_sent), (long long unsigned)(client_stats[i].bytes_lost), - (long long unsigned)(client_stats[i].num_loss_events)); + (long long unsigned)(client_stats[i].num_loss_events), + client_stats[i].referer.c_str(), + client_stats[i].user_agent.c_str()); } if (fclose(fp) == EOF) { log_perror("fclose");