Parse and log referer and user-agent headers in requests.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Wed, 22 Jul 2015 13:05:14 +0000 (15:05 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Wed, 22 Jul 2015 13:05:14 +0000 (15:05 +0200)
accesslog.cpp
client.cpp
client.h
server.cpp
state.proto
stats.cpp

index 4414650..3e1ebe5 100644 (file)
@@ -58,14 +58,16 @@ void AccessLogThread::do_work()
                                log_perror("clock_gettime(CLOCK_MONOTONIC_COARSE)");
                        } else {
                                for (size_t i = 0; i < writes.size(); ++i) {
-                                       fprintf(logfp, "%llu %s %s %d %llu %llu %llu\n",
+                                       fprintf(logfp, "%llu %s %s %d %llu %llu %llu \"%s\" \"%s\"\n",
                                                (long long unsigned)(writes[i].connect_time.tv_sec),
                                                writes[i].remote_addr.c_str(),
                                                writes[i].url.c_str(),
                                                int(now.tv_sec - writes[i].connect_time.tv_sec),  // Rather coarse.
                                                (long long unsigned)(writes[i].bytes_sent),
                                                (long long unsigned)(writes[i].bytes_lost),
-                                               (long long unsigned)(writes[i].num_loss_events));
+                                               (long long unsigned)(writes[i].num_loss_events),
+                                               writes[i].referer.c_str(),
+                                               writes[i].user_agent.c_str());
                                }
                                fflush(logfp);
                        }
index f191a40..786e178 100644 (file)
@@ -1,3 +1,4 @@
+#include <stdio.h>
 #include <arpa/inet.h>
 #include <netinet/in.h>
 #include <stdint.h>
@@ -63,6 +64,8 @@ Client::Client(int sock)
 Client::Client(const ClientProto &serialized, Stream *stream)
        : sock(serialized.sock()),
          remote_addr(serialized.remote_addr()),
+         referer(serialized.referer()),
+         user_agent(serialized.user_agent()),
          state(State(serialized.state())),
          request(serialized.request()),
          url(serialized.url()),
@@ -90,6 +93,8 @@ ClientProto Client::serialize() const
        ClientProto serialized;
        serialized.set_sock(sock);
        serialized.set_remote_addr(remote_addr);
+       serialized.set_referer(referer);
+       serialized.set_user_agent(user_agent);
        serialized.set_connect_time_sec(connect_time.tv_sec);
        serialized.set_connect_time_nsec(connect_time.tv_nsec);
        serialized.set_state(state);
@@ -103,6 +108,24 @@ ClientProto Client::serialize() const
        serialized.set_num_loss_events(num_loss_events);
        return serialized;
 }
+
+namespace {
+
+string escape_string(const string &str) {
+       string ret;
+       for (size_t i = 0; i < str.size(); ++i) {
+               char buf[16];
+               if (isprint(str[i]) && str[i] >= 32 && str[i] != '"' && str[i] != '\\') {
+                       ret.push_back(str[i]);
+               } else {
+                       snprintf(buf, sizeof(buf), "\\x%02x", (unsigned char)str[i]);
+                       ret += buf;
+               }
+       }
+       return ret;
+}
+
+} // namespace
        
 ClientStats Client::get_stats() const
 {
@@ -114,6 +137,8 @@ ClientStats Client::get_stats() const
        }
        stats.sock = sock;
        stats.remote_addr = remote_addr;
+       stats.referer = escape_string(referer);
+       stats.user_agent = escape_string(user_agent);
        stats.connect_time = connect_time;
        stats.bytes_sent = bytes_sent;
        stats.bytes_lost = bytes_lost;
index 4e80031..954ad41 100644 (file)
--- a/client.h
+++ b/client.h
@@ -11,10 +11,13 @@ class ClientProto;
 struct Stream;
 
 // Digested statistics for writing to logs etc.
+// Note that "referer" and "user_agent" here are already escaped for scary characters.
 struct ClientStats {
        std::string url;
        int sock;
        std::string remote_addr;
+       std::string referer;
+       std::string user_agent;
        timespec connect_time;
        size_t bytes_sent;
        size_t bytes_lost;
@@ -38,6 +41,8 @@ struct Client {
 
        // Some information only used for logging.
        std::string remote_addr;
+       std::string referer;
+       std::string user_agent;
 
        enum State { READING_REQUEST, SENDING_HEADER, SENDING_DATA, SENDING_ERROR, WAITING_FOR_KEYFRAME, PREBUFFERING };
        State state;
index 079b629..2f7a332 100644 (file)
@@ -571,6 +571,18 @@ int Server::parse_request(Client *client)
                return 400;  // Bad request (empty).
        }
 
+       // Parse the headers, for logging purposes.
+       // TODO: Case-insensitivity.
+       multimap<string, string> headers = extract_headers(lines, client->remote_addr);
+       multimap<string, string>::const_iterator referer_it = headers.find("Referer");
+       if (referer_it != headers.end()) {
+               client->referer = referer_it->second;
+       }
+       multimap<string, string>::const_iterator user_agent_it = headers.find("User-Agent");
+       if (user_agent_it != headers.end()) {
+               client->user_agent = user_agent_it->second;
+       }
+
        vector<string> request_tokens = split_tokens(lines[0]);
        if (request_tokens.size() < 2) {
                return 400;  // Bad request (empty).
@@ -598,6 +610,7 @@ int Server::parse_request(Client *client)
        }
 
        client->url = request_tokens[1];
+
        client->stream = stream;
        if (setsockopt(client->sock, SOL_SOCKET, SO_MAX_PACING_RATE, &client->stream->pacing_rate, sizeof(client->stream->pacing_rate)) == -1) {
                if (client->stream->pacing_rate != ~0U) {
index 30b63da..3a61108 100644 (file)
@@ -14,6 +14,8 @@ message ClientProto {
        optional int64 bytes_sent = 10;
        optional int64 bytes_lost = 11;
        optional int64 num_loss_events = 12;
+       optional bytes referer = 15;
+       optional bytes user_agent = 16;
 };
 
 // Corresponds to struct Stream.
index b0765ed..5cfa669 100644 (file)
--- a/stats.cpp
+++ b/stats.cpp
@@ -59,7 +59,7 @@ void StatsThread::do_work()
 
                client_stats = servers->get_client_stats();
                for (size_t i = 0; i < client_stats.size(); ++i) {
-                       fprintf(fp, "%s %d %d %s %d %llu %llu %llu\n",
+                       fprintf(fp, "%s %d %d %s %d %llu %llu %llu \"%s\" \"%s\"\n",
                                client_stats[i].remote_addr.c_str(),
                                client_stats[i].sock,
                                0,  // Used to be fwmark.
@@ -67,7 +67,9 @@ void StatsThread::do_work()
                                int(now.tv_sec - client_stats[i].connect_time.tv_sec),  // Rather coarse.
                                (long long unsigned)(client_stats[i].bytes_sent),
                                (long long unsigned)(client_stats[i].bytes_lost),
-                               (long long unsigned)(client_stats[i].num_loss_events));
+                               (long long unsigned)(client_stats[i].num_loss_events),
+                               client_stats[i].referer.c_str(),
+                               client_stats[i].user_agent.c_str());
                }
                if (fclose(fp) == EOF) {
                        log_perror("fclose");