]> git.sesse.net Git - cubemap/blobdiff - server.cpp
Add suppor for raw (non-Metacube) inputs over HTTP. Only really useful for TS.
[cubemap] / server.cpp
index 64819f447ffeec0fbf6de8187e2de1d7705c240c..d3b936f0b9c1f869c34c5dbf05c1beb32ad90a8e 100644 (file)
@@ -1,54 +1,76 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <unistd.h>
 #include <assert.h>
-#include <arpa/inet.h>
-#include <sys/socket.h>
+#include <errno.h>
+#include <netinet/in.h>
 #include <pthread.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <sys/epoll.h>
 #include <sys/sendfile.h>
-#include <time.h>
-#include <signal.h>
-#include <errno.h>
-#include <vector>
-#include <string>
-#include <map>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
 #include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
 
-#include "markpool.h"
+#include "accesslog.h"
+#include "log.h"
+#include "metacube2.h"
+#include "mutexlock.h"
 #include "parse.h"
 #include "server.h"
-#include "stream.h"
-#include "mutexlock.h"
 #include "state.pb.h"
+#include "stream.h"
+#include "util.h"
+
+#ifndef SO_MAX_PACING_RATE
+#define SO_MAX_PACING_RATE 47
+#endif
 
 using namespace std;
 
+extern AccessLogThread *access_log;
+
+namespace {
+
+inline bool is_equal(timespec a, timespec b)
+{
+       return a.tv_sec == b.tv_sec &&
+              a.tv_nsec == b.tv_nsec;
+}
+
+inline bool is_earlier(timespec a, timespec b)
+{
+       if (a.tv_sec != b.tv_sec)
+               return a.tv_sec < b.tv_sec;
+       return a.tv_nsec < b.tv_nsec;
+}
+
+}  // namespace
+
 Server::Server()
 {
        pthread_mutex_init(&mutex, NULL);
-       pthread_mutex_init(&queued_data_mutex, NULL);
+       pthread_mutex_init(&queued_clients_mutex, NULL);
 
        epoll_fd = epoll_create(1024);  // Size argument is ignored.
        if (epoll_fd == -1) {
-               perror("epoll_fd");
+               log_perror("epoll_fd");
                exit(1);
        }
 }
 
 Server::~Server()
 {
-       int ret;
-       do {
-               ret = close(epoll_fd);
-       } while (ret == -1 && errno == EINTR);
-
-       if (ret == -1) {
-               perror("close(epoll_fd)");
+       for (size_t i = 0; i < streams.size(); ++i) {   
+               delete streams[i];
        }
+
+       safe_close(epoll_fd);
 }
 
 vector<ClientStats> Server::get_client_stats() const
@@ -66,16 +88,18 @@ vector<ClientStats> Server::get_client_stats() const
 
 void Server::do_work()
 {
-       for ( ;; ) {
-               int nfds = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, EPOLL_TIMEOUT_MS);
-               if (nfds == -1 && errno == EINTR) {
-                       if (should_stop) {
-                               return;
-                       }
-                       continue;
-               }
-               if (nfds == -1) {
-                       perror("epoll_wait");
+       while (!should_stop()) {
+               // Wait until there's activity on at least one of the fds,
+               // or 20 ms (about one frame at 50 fps) has elapsed.
+               //
+               // We could in theory wait forever and rely on wakeup()
+               // from add_client_deferred() and add_data_deferred(),
+               // but wakeup is a pretty expensive operation, and the
+               // two threads might end up fighting over a lock, so it's
+               // seemingly (much) more efficient to just have a timeout here.
+               int nfds = epoll_pwait(epoll_fd, events, EPOLL_MAX_EVENTS, EPOLL_TIMEOUT_MS, &sigset_without_usr1_block);
+               if (nfds == -1 && errno != EINTR) {
+                       log_perror("epoll_wait");
                        exit(1);
                }
 
@@ -83,10 +107,9 @@ void Server::do_work()
        
                process_queued_data();
 
+               // Process each client where we have socket activity.
                for (int i = 0; i < nfds; ++i) {
-                       int fd = events[i].data.fd;
-                       assert(clients.count(fd) != 0);
-                       Client *client = &clients[fd];
+                       Client *client = reinterpret_cast<Client *>(events[i].data.u64);
 
                        if (events[i].events & (EPOLLERR | EPOLLRDHUP | EPOLLHUP)) {
                                close_client(client);
@@ -96,18 +119,57 @@ void Server::do_work()
                        process_client(client);
                }
 
-               for (map<string, Stream *>::iterator stream_it = streams.begin();
-                    stream_it != streams.end();
-                    ++stream_it) {
+               // Process each client where its stream has new data,
+               // even if there was no socket activity.
+               for (size_t i = 0; i < streams.size(); ++i) {   
                        vector<Client *> to_process;
-                       swap(stream_it->second->to_process, to_process);
+                       swap(streams[i]->to_process, to_process);
                        for (size_t i = 0; i < to_process.size(); ++i) {
                                process_client(to_process[i]);
                        }
                }
 
-               if (should_stop) {
-                       return;
+               // Finally, go through each client to see if it's timed out
+               // in the READING_REQUEST state. (Seemingly there are clients
+               // that can hold sockets up for days at a time without sending
+               // anything at all.)
+               timespec timeout_time;
+               if (clock_gettime(CLOCK_MONOTONIC_COARSE, &timeout_time) == -1) {
+                       log_perror("clock_gettime(CLOCK_MONOTONIC_COARSE)");
+                       continue;
+               }
+               timeout_time.tv_sec -= REQUEST_READ_TIMEOUT_SEC;
+               while (!clients_ordered_by_connect_time.empty()) {
+                       const pair<timespec, int> &connect_time_and_fd = clients_ordered_by_connect_time.front();
+
+                       // See if we have reached the end of clients to process.
+                       if (is_earlier(timeout_time, connect_time_and_fd.first)) {
+                               break;
+                       }
+
+                       // If this client doesn't exist anymore, just ignore it
+                       // (it was deleted earlier).
+                       map<int, Client>::iterator client_it = clients.find(connect_time_and_fd.second);
+                       if (client_it == clients.end()) {
+                               clients_ordered_by_connect_time.pop();
+                               continue;
+                       }
+                       Client *client = &client_it->second;
+                       if (!is_equal(client->connect_time, connect_time_and_fd.first)) {
+                               // Another client has taken this fd in the meantime.
+                               clients_ordered_by_connect_time.pop();
+                               continue;
+                       }
+
+                       if (client->state != Client::READING_REQUEST) {
+                               // Only READING_REQUEST can time out.
+                               clients_ordered_by_connect_time.pop();
+                               continue;
+                       }
+
+                       // OK, it timed out.
+                       close_client(client);
+                       clients_ordered_by_connect_time.pop();
                }
        }
 }
@@ -117,49 +179,87 @@ CubemapStateProto Server::serialize()
        // We don't serialize anything queued, so empty the queues.
        process_queued_data();
 
+       // Set all clients in a consistent state before serializing
+       // (ie., they have no remaining lost data). Otherwise, increasing
+       // the backlog could take clients into a newly valid area of the backlog,
+       // sending a stream of zeros instead of skipping the data as it should.
+       //
+       // TODO: Do this when clients are added back from serialized state instead;
+       // it would probably be less wasteful.
+       for (map<int, Client>::iterator client_it = clients.begin();
+            client_it != clients.end();
+            ++client_it) {
+               skip_lost_data(&client_it->second);
+       }
+
        CubemapStateProto serialized;
        for (map<int, Client>::const_iterator client_it = clients.begin();
             client_it != clients.end();
             ++client_it) {
                serialized.add_clients()->MergeFrom(client_it->second.serialize());
        }
-       for (map<string, Stream *>::const_iterator stream_it = streams.begin();
-            stream_it != streams.end();
-            ++stream_it) {
-               serialized.add_streams()->MergeFrom(stream_it->second->serialize());
+       for (size_t i = 0; i < streams.size(); ++i) {   
+               serialized.add_streams()->MergeFrom(streams[i]->serialize());
        }
        return serialized;
 }
 
 void Server::add_client_deferred(int sock)
 {
-       MutexLock lock(&queued_data_mutex);
+       MutexLock lock(&queued_clients_mutex);
        queued_add_clients.push_back(sock);
 }
 
 void Server::add_client(int sock)
 {
-       clients.insert(make_pair(sock, Client(sock)));
+       pair<map<int, Client>::iterator, bool> ret =
+               clients.insert(make_pair(sock, Client(sock)));
+       assert(ret.second == true);  // Should not already exist.
+       Client *client_ptr = &ret.first->second;
+
+       // Connection timestamps must be nondecreasing. I can't find any guarantee
+       // that even the monotonic clock can't go backwards by a small amount
+       // (think switching between CPUs with non-synchronized TSCs), so if
+       // this actually should happen, we hack around it by fudging
+       // connect_time.
+       if (!clients_ordered_by_connect_time.empty() &&
+           is_earlier(client_ptr->connect_time, clients_ordered_by_connect_time.back().first)) {
+               client_ptr->connect_time = clients_ordered_by_connect_time.back().first;
+       }
+       clients_ordered_by_connect_time.push(make_pair(client_ptr->connect_time, sock));
 
        // Start listening on data from this socket.
        epoll_event ev;
        ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
-       ev.data.u64 = 0;  // Keep Valgrind happy.
-       ev.data.fd = sock;
+       ev.data.u64 = reinterpret_cast<uint64_t>(client_ptr);
        if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev) == -1) {
-               perror("epoll_ctl(EPOLL_CTL_ADD)");
+               log_perror("epoll_ctl(EPOLL_CTL_ADD)");
                exit(1);
        }
 
-       process_client(&clients[sock]);
+       process_client(client_ptr);
 }
 
 void Server::add_client_from_serialized(const ClientProto &client)
 {
        MutexLock lock(&mutex);
-       Stream *stream = find_stream(client.stream_id());
-       clients.insert(make_pair(client.sock(), Client(client, stream)));
-       Client *client_ptr = &clients[client.sock()];
+       Stream *stream;
+       int stream_index = lookup_stream_by_url(client.url());
+       if (stream_index == -1) {
+               assert(client.state() != Client::SENDING_DATA);
+               stream = NULL;
+       } else {
+               stream = streams[stream_index];
+       }
+       pair<map<int, Client>::iterator, bool> ret =
+               clients.insert(make_pair(client.sock(), Client(client, stream)));
+       assert(ret.second == true);  // Should not already exist.
+       Client *client_ptr = &ret.first->second;
+
+       // Connection timestamps must be nondecreasing.
+       assert(clients_ordered_by_connect_time.empty() ||
+              !is_earlier(client_ptr->connect_time, clients_ordered_by_connect_time.back().first));
+       clients_ordered_by_connect_time.push(make_pair(client_ptr->connect_time, client.sock()));
 
        // Start listening on data from this socket.
        epoll_event ev;
@@ -170,106 +270,113 @@ void Server::add_client_from_serialized(const ClientProto &client)
                // the sleeping array again soon.
                ev.events = EPOLLOUT | EPOLLET | EPOLLRDHUP;
        }
-       ev.data.u64 = 0;  // Keep Valgrind happy.
-       ev.data.fd = client.sock();
+       ev.data.u64 = reinterpret_cast<uint64_t>(client_ptr);
        if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client.sock(), &ev) == -1) {
-               perror("epoll_ctl(EPOLL_CTL_ADD)");
+               log_perror("epoll_ctl(EPOLL_CTL_ADD)");
                exit(1);
        }
 
-       if (client_ptr->state == Client::SENDING_DATA && 
-           client_ptr->bytes_sent == client_ptr->stream->bytes_received) {
+       if (client_ptr->state == Client::WAITING_FOR_KEYFRAME ||
+           client_ptr->state == Client::PREBUFFERING ||
+           (client_ptr->state == Client::SENDING_DATA &&
+            client_ptr->stream_pos == client_ptr->stream->bytes_received)) {
                client_ptr->stream->put_client_to_sleep(client_ptr);
        } else {
                process_client(client_ptr);
        }
 }
 
-void Server::add_stream(const string &stream_id, size_t backlog_size)
+int Server::lookup_stream_by_url(const string &url) const
+{
+       map<string, int>::const_iterator stream_url_it = stream_url_map.find(url);
+       if (stream_url_it == stream_url_map.end()) {
+               return -1;
+       }
+       return stream_url_it->second;
+}
+
+int Server::add_stream(const string &url, size_t backlog_size, size_t prebuffering_bytes, Stream::Encoding encoding, Stream::Encoding src_encoding)
 {
        MutexLock lock(&mutex);
-       streams.insert(make_pair(stream_id, new Stream(stream_id, backlog_size)));
+       stream_url_map.insert(make_pair(url, streams.size()));
+       streams.push_back(new Stream(url, backlog_size, prebuffering_bytes, encoding, src_encoding));
+       return streams.size() - 1;
 }
 
-void Server::add_stream_from_serialized(const StreamProto &stream)
+int Server::add_stream_from_serialized(const StreamProto &stream, int data_fd)
 {
        MutexLock lock(&mutex);
-       streams.insert(make_pair(stream.stream_id(), new Stream(stream)));
+       stream_url_map.insert(make_pair(stream.url(), streams.size()));
+       streams.push_back(new Stream(stream, data_fd));
+       return streams.size() - 1;
 }
        
-void Server::set_header(const string &stream_id, const string &header)
+void Server::set_backlog_size(int stream_index, size_t new_size)
 {
        MutexLock lock(&mutex);
-       find_stream(stream_id)->header = header;
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->set_backlog_size(new_size);
+}
 
-       // If there are clients we haven't sent anything to yet, we should give
-       // them the header, so push back into the SENDING_HEADER state.
-       for (map<int, Client>::iterator client_it = clients.begin();
-            client_it != clients.end();
-            ++client_it) {
-               Client *client = &client_it->second;
-               if (client->state == Client::SENDING_DATA &&
-                   client->bytes_sent == 0) {
-                       construct_header(client);
-               }
-       }
+void Server::set_prebuffering_bytes(int stream_index, size_t new_amount)
+{
+       MutexLock lock(&mutex);
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->prebuffering_bytes = new_amount;
 }
        
-void Server::set_mark_pool(const std::string &stream_id, MarkPool *mark_pool)
+void Server::set_encoding(int stream_index, Stream::Encoding encoding)
 {
        MutexLock lock(&mutex);
-       assert(clients.empty());
-       find_stream(stream_id)->mark_pool = mark_pool;
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->encoding = encoding;
 }
 
-void Server::add_data_deferred(const string &stream_id, const char *data, size_t bytes)
+void Server::set_src_encoding(int stream_index, Stream::Encoding encoding)
 {
-       MutexLock lock(&queued_data_mutex);
-       queued_data[stream_id].append(string(data, data + bytes));
+       MutexLock lock(&mutex);
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->src_encoding = encoding;
 }
-
-void Server::add_data(const string &stream_id, const char *data, ssize_t bytes)
+       
+void Server::set_header(int stream_index, const string &http_header, const string &stream_header)
 {
-       Stream *stream = find_stream(stream_id);
-       size_t pos = stream->bytes_received % stream->backlog_size;
-       stream->bytes_received += bytes;
-
-       if (pos + bytes > stream->backlog_size) {
-               ssize_t to_copy = stream->backlog_size - pos;
-               while (to_copy > 0) {
-                       int ret = pwrite(stream->data_fd, data, to_copy, pos);
-                       if (ret == -1 && errno == EINTR) {
-                               continue;
-                       }
-                       if (ret == -1) {
-                               perror("pwrite");
-                               // Dazed and confused, but trying to continue...
-                               break;
-                       }
-                       pos += ret;
-                       data += ret;
-                       to_copy -= ret;
-                       bytes -= ret;
-               }
-               pos = 0;
+       MutexLock lock(&mutex);
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->http_header = http_header;
+
+       if (stream_header != streams[stream_index]->stream_header) {
+               // We cannot start at any of the older starting points anymore,
+               // since they'd get the wrong header for the stream (not to mention
+               // that a changed header probably means the stream restarted,
+               // which means any client starting on the old one would probably
+               // stop playing properly at the change point). Next block
+               // should be a suitable starting point (if not, something is
+               // pretty strange), so it will fill up again soon enough.
+               streams[stream_index]->suitable_starting_points.clear();
        }
+       streams[stream_index]->stream_header = stream_header;
+}
+       
+void Server::set_pacing_rate(int stream_index, uint32_t pacing_rate)
+{
+       MutexLock lock(&mutex);
+       assert(clients.empty());
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->pacing_rate = pacing_rate;
+}
 
-       while (bytes > 0) {
-               int ret = pwrite(stream->data_fd, data, bytes, pos);
-               if (ret == -1 && errno == EINTR) {
-                       continue;
-               }
-               if (ret == -1) {
-                       perror("pwrite");
-                       // Dazed and confused, but trying to continue...
-                       break;
-               }
-               pos += ret;
-               data += ret;
-               bytes -= ret;
-       }
+void Server::add_gen204(const std::string &url, const std::string &allow_origin)
+{
+       MutexLock lock(&mutex);
+       assert(clients.empty());
+       ping_url_map[url] = allow_origin;
+}
 
-       stream->wake_up_all_clients();
+void Server::add_data_deferred(int stream_index, const char *data, size_t bytes, StreamStartSuitability suitable_for_stream_start)
+{
+       assert(stream_index >= 0 && stream_index < ssize_t(streams.size()));
+       streams[stream_index]->add_data_deferred(data, bytes, suitable_for_stream_start);
 }
 
 // See the .h file for postconditions after this function.     
@@ -291,7 +398,7 @@ read_request_again:
                        return;
                }
                if (ret == -1) {
-                       perror("read");
+                       log_perror("read");
                        close_client(client);
                        return;
                }
@@ -305,7 +412,7 @@ read_request_again:
        
                switch (status) {
                case RP_OUT_OF_SPACE:
-                       fprintf(stderr, "WARNING: fd %d sent overlong request!\n", client->sock);
+                       log(WARNING, "[%s] Client sent overlong request!", client->remote_addr.c_str());
                        close_client(client);
                        return;
                case RP_NOT_FINISHED_YET:
@@ -313,7 +420,7 @@ read_request_again:
                        // See if there's more data for us.
                        goto read_request_again;
                case RP_EXTRA_DATA:
-                       fprintf(stderr, "WARNING: fd %d had junk data after request!\n", client->sock);
+                       log(WARNING, "[%s] Junk data after request!", client->remote_addr.c_str());
                        close_client(client);
                        return;
                case RP_FINISHED:
@@ -325,22 +432,24 @@ read_request_again:
                int error_code = parse_request(client);
                if (error_code == 200) {
                        construct_header(client);
+               } else if (error_code == 204) {
+                       construct_204(client);
                } else {
                        construct_error(client, error_code);
                }
 
                // We've changed states, so fall through.
-               assert(client->state == Client::SENDING_ERROR ||
+               assert(client->state == Client::SENDING_SHORT_RESPONSE ||
                       client->state == Client::SENDING_HEADER);
        }
-       case Client::SENDING_ERROR:
+       case Client::SENDING_SHORT_RESPONSE:
        case Client::SENDING_HEADER: {
-sending_header_or_error_again:
+sending_header_or_short_response_again:
                int ret;
                do {
                        ret = write(client->sock,
-                                   client->header_or_error.data() + client->header_or_error_bytes_sent,
-                                   client->header_or_error.size() - client->header_or_error_bytes_sent);
+                                   client->header_or_short_response.data() + client->header_or_short_response_bytes_sent,
+                                   client->header_or_short_response.size() - client->header_or_short_response_bytes_sent);
                } while (ret == -1 && errno == EINTR);
 
                if (ret == -1 && errno == EAGAIN) {
@@ -353,64 +462,119 @@ sending_header_or_error_again:
 
                if (ret == -1) {
                        // Error! Postcondition #1.
-                       perror("write");
+                       log_perror("write");
                        close_client(client);
                        return;
                }
                
-               client->header_or_error_bytes_sent += ret;
-               assert(client->header_or_error_bytes_sent <= client->header_or_error.size());
+               client->header_or_short_response_bytes_sent += ret;
+               assert(client->header_or_short_response_bytes_sent <= client->header_or_short_response.size());
 
-               if (client->header_or_error_bytes_sent < client->header_or_error.size()) {
+               if (client->header_or_short_response_bytes_sent < client->header_or_short_response.size()) {
                        // We haven't sent all yet. Fine; go another round.
-                       goto sending_header_or_error_again;
+                       goto sending_header_or_short_response_again;
                }
 
                // We're done sending the header or error! Clear it to release some memory.
-               client->header_or_error.clear();
+               client->header_or_short_response.clear();
 
-               if (client->state == Client::SENDING_ERROR) {
+               if (client->state == Client::SENDING_SHORT_RESPONSE) {
                        // We're done sending the error, so now close.  
                        // This is postcondition #1.
                        close_client(client);
                        return;
                }
 
-               // Start sending from the end. In other words, we won't send any of the backlog,
-               // but we'll start sending immediately as we get data.
-               // This is postcondition #3.
+               Stream *stream = client->stream;
+               if (client->stream_pos == size_t(-2)) {
+                       // Start sending from the beginning of the backlog.
+                       client->stream_pos = min<size_t>(
+                           stream->bytes_received - stream->backlog_size,
+                           0);
+                       client->state = Client::SENDING_DATA;
+                       goto sending_data;
+               } else if (stream->prebuffering_bytes == 0) {
+                       // Start sending from the first keyframe we get. In other
+                       // words, we won't send any of the backlog, but we'll start
+                       // sending immediately as we get the next keyframe block.
+                       // Note that this is functionally identical to the next if branch,
+                       // except that we save a binary search.
+                       client->stream_pos = stream->bytes_received;
+                       client->state = Client::WAITING_FOR_KEYFRAME;
+               } else {
+                       // We're not going to send anything to the client before we have
+                       // N bytes. However, this wait might be boring; we can just as well
+                       // use it to send older data if we have it. We use lower_bound()
+                       // so that we are conservative and never add extra latency over just
+                       // waiting (assuming CBR or nearly so); otherwise, we could want e.g.
+                       // 100 kB prebuffer but end up sending a 10 MB GOP.
+                       deque<size_t>::const_iterator starting_point_it =
+                               lower_bound(stream->suitable_starting_points.begin(),
+                                           stream->suitable_starting_points.end(),
+                                           stream->bytes_received - stream->prebuffering_bytes);
+                       if (starting_point_it == stream->suitable_starting_points.end()) {
+                               // None found. Just put us at the end, and then wait for the
+                               // first keyframe to appear.
+                               client->stream_pos = stream->bytes_received;
+                               client->state = Client::WAITING_FOR_KEYFRAME;
+                       } else {
+                               client->stream_pos = *starting_point_it;
+                               client->state = Client::PREBUFFERING;
+                               goto prebuffering;
+                       }
+               }
+               // Fall through.
+       }
+       case Client::WAITING_FOR_KEYFRAME: {
+               Stream *stream = client->stream;
+               if (stream->suitable_starting_points.empty() ||
+                   client->stream_pos > stream->suitable_starting_points.back()) {
+                       // We haven't received a keyframe since this stream started waiting,
+                       // so keep on waiting for one.
+                       // This is postcondition #3.
+                       stream->put_client_to_sleep(client);
+                       return;
+               }
+               client->stream_pos = stream->suitable_starting_points.back();
+               client->state = Client::PREBUFFERING;
+               // Fall through.
+       }
+       case Client::PREBUFFERING: {
+prebuffering:
+               Stream *stream = client->stream;
+               size_t bytes_to_send = stream->bytes_received - client->stream_pos;
+               assert(bytes_to_send <= stream->backlog_size);
+               if (bytes_to_send < stream->prebuffering_bytes) {
+                       // We don't have enough bytes buffered to start this client yet.
+                       // This is postcondition #3.
+                       stream->put_client_to_sleep(client);
+                       return;
+               }
                client->state = Client::SENDING_DATA;
-               client->bytes_sent = client->stream->bytes_received;
-               client->stream->put_client_to_sleep(client);
-               return;
+               // Fall through.
        }
        case Client::SENDING_DATA: {
-sending_data_again:
-               // See if there's some data we've lost. Ideally, we should drop to a block boundary,
-               // but resync will be the mux's problem.
+sending_data:
+               skip_lost_data(client);
                Stream *stream = client->stream;
-               size_t bytes_to_send = stream->bytes_received - client->bytes_sent;
+
+sending_data_again:
+               size_t bytes_to_send = stream->bytes_received - client->stream_pos;
+               assert(bytes_to_send <= stream->backlog_size);
                if (bytes_to_send == 0) {
                        return;
                }
-               if (bytes_to_send > stream->backlog_size) {
-                       fprintf(stderr, "WARNING: fd %d lost %lld bytes, maybe too slow connection\n",
-                               client->sock,
-                               (long long int)(bytes_to_send - stream->backlog_size));
-                       client->bytes_sent = stream->bytes_received - stream->backlog_size;
-                       bytes_to_send = stream->backlog_size;
-               }
 
                // See if we need to split across the circular buffer.
                bool more_data = false;
-               if ((client->bytes_sent % stream->backlog_size) + bytes_to_send > stream->backlog_size) {
-                       bytes_to_send = stream->backlog_size - (client->bytes_sent % stream->backlog_size);
+               if ((client->stream_pos % stream->backlog_size) + bytes_to_send > stream->backlog_size) {
+                       bytes_to_send = stream->backlog_size - (client->stream_pos % stream->backlog_size);
                        more_data = true;
                }
 
                ssize_t ret;
                do {
-                       loff_t offset = client->bytes_sent % stream->backlog_size;
+                       off_t offset = client->stream_pos % stream->backlog_size;
                        ret = sendfile(client->sock, stream->data_fd, &offset, bytes_to_send);
                } while (ret == -1 && errno == EINTR);
 
@@ -422,17 +586,18 @@ sending_data_again:
                }
                if (ret == -1) {
                        // Error, close; postcondition #1.
-                       perror("sendfile");
+                       log_perror("sendfile");
                        close_client(client);
                        return;
                }
+               client->stream_pos += ret;
                client->bytes_sent += ret;
 
-               if (client->bytes_sent == stream->bytes_received) {
+               if (client->stream_pos == stream->bytes_received) {
                        // We don't have any more data for this client, so put it to sleep.
                        // This is postcondition #3.
                        stream->put_client_to_sleep(client);
-               } else if (more_data) {
+               } else if (more_data && size_t(ret) == bytes_to_send) {
                        goto sending_data_again;
                }
                break;
@@ -442,6 +607,23 @@ sending_data_again:
        }
 }
 
+// See if there's some data we've lost. Ideally, we should drop to a block boundary,
+// but resync will be the mux's problem.
+void Server::skip_lost_data(Client *client)
+{
+       Stream *stream = client->stream;
+       if (stream == NULL) {
+               return;
+       }
+       size_t bytes_to_send = stream->bytes_received - client->stream_pos;
+       if (bytes_to_send > stream->backlog_size) {
+               size_t bytes_lost = bytes_to_send - stream->backlog_size;
+               client->stream_pos = stream->bytes_received - stream->backlog_size;
+               client->bytes_lost += bytes_lost;
+               ++client->num_loss_events;
+       }
+}
+
 int Server::parse_request(Client *client)
 {
        vector<string> lines = split_lines(client->request);
@@ -449,6 +631,18 @@ int Server::parse_request(Client *client)
                return 400;  // Bad request (empty).
        }
 
+       // Parse the headers, for logging purposes.
+       // TODO: Case-insensitivity.
+       multimap<string, string> headers = extract_headers(lines, client->remote_addr);
+       multimap<string, string>::const_iterator referer_it = headers.find("Referer");
+       if (referer_it != headers.end()) {
+               client->referer = referer_it->second;
+       }
+       multimap<string, string>::const_iterator user_agent_it = headers.find("User-Agent");
+       if (user_agent_it != headers.end()) {
+               client->user_agent = user_agent_it->second;
+       }
+
        vector<string> request_tokens = split_tokens(lines[0]);
        if (request_tokens.size() < 2) {
                return 400;  // Bad request (empty).
@@ -456,20 +650,35 @@ int Server::parse_request(Client *client)
        if (request_tokens[0] != "GET") {
                return 400;  // Should maybe be 405 instead?
        }
-       if (streams.count(request_tokens[1]) == 0) {
-               return 404;  // Not found.
-       }
 
-       client->stream_id = request_tokens[1];
-       client->stream = find_stream(client->stream_id);
-       if (client->stream->mark_pool != NULL) {
-               client->fwmark = client->stream->mark_pool->get_mark();
+       string url = request_tokens[1];
+       client->url = url;
+       if (url.find("?backlog") == url.size() - 8) {
+               client->stream_pos = -2;
+               url = url.substr(0, url.size() - 8);
        } else {
-               client->fwmark = 0;  // No mark.
+               client->stream_pos = -1;
        }
-       if (setsockopt(client->sock, SOL_SOCKET, SO_MARK, &client->fwmark, sizeof(client->fwmark)) == -1) {                          
-               if (client->fwmark != 0) {
-                       perror("setsockopt(SO_MARK)");
+
+       map<string, int>::const_iterator stream_url_map_it = stream_url_map.find(url);
+       if (stream_url_map_it == stream_url_map.end()) {
+               map<string, string>::const_iterator ping_url_map_it = ping_url_map.find(url);
+               if (ping_url_map_it == ping_url_map.end()) {
+                       return 404;  // Not found.
+               } else {
+                       return 204;  // No error.
+               }
+       }
+
+       Stream *stream = streams[stream_url_map_it->second];
+       if (stream->http_header.empty()) {
+               return 503;  // Service unavailable.
+       }
+
+       client->stream = stream;
+       if (setsockopt(client->sock, SOL_SOCKET, SO_MAX_PACING_RATE, &client->stream->pacing_rate, sizeof(client->stream->pacing_rate)) == -1) {
+               if (client->stream->pacing_rate != ~0U) {
+                       log_perror("setsockopt(SO_MAX_PACING_RATE)");
                }
        }
        client->request.clear();
@@ -479,18 +688,38 @@ int Server::parse_request(Client *client)
 
 void Server::construct_header(Client *client)
 {
-       client->header_or_error = find_stream(client->stream_id)->header;
+       Stream *stream = client->stream;
+       if (stream->encoding == Stream::STREAM_ENCODING_RAW) {
+               client->header_or_short_response = stream->http_header +
+                       "\r\n" +
+                       stream->stream_header;
+       } else if (stream->encoding == Stream::STREAM_ENCODING_METACUBE) {
+               client->header_or_short_response = stream->http_header +
+                       "Content-encoding: metacube\r\n" +
+                       "\r\n";
+               if (!stream->stream_header.empty()) {
+                       metacube2_block_header hdr;
+                       memcpy(hdr.sync, METACUBE2_SYNC, sizeof(hdr.sync));
+                       hdr.size = htonl(stream->stream_header.size());
+                       hdr.flags = htons(METACUBE_FLAGS_HEADER);
+                       hdr.csum = htons(metacube2_compute_crc(&hdr));
+                       client->header_or_short_response.append(
+                               string(reinterpret_cast<char *>(&hdr), sizeof(hdr)));
+               }
+               client->header_or_short_response.append(stream->stream_header);
+       } else {
+               assert(false);
+       }
 
        // Switch states.
        client->state = Client::SENDING_HEADER;
 
        epoll_event ev;
        ev.events = EPOLLOUT | EPOLLET | EPOLLRDHUP;
-       ev.data.u64 = 0;  // Keep Valgrind happy.
-       ev.data.fd = client->sock;
+       ev.data.u64 = reinterpret_cast<uint64_t>(client);
 
        if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, client->sock, &ev) == -1) {
-               perror("epoll_ctl(EPOLL_CTL_MOD)");
+               log_perror("epoll_ctl(EPOLL_CTL_MOD)");
                exit(1);
        }
 }
@@ -500,18 +729,49 @@ void Server::construct_error(Client *client, int error_code)
        char error[256];
        snprintf(error, 256, "HTTP/1.0 %d Error\r\nContent-type: text/plain\r\n\r\nSomething went wrong. Sorry.\r\n",
                error_code);
-       client->header_or_error = error;
+       client->header_or_short_response = error;
+
+       // Switch states.
+       client->state = Client::SENDING_SHORT_RESPONSE;
+
+       epoll_event ev;
+       ev.events = EPOLLOUT | EPOLLET | EPOLLRDHUP;
+       ev.data.u64 = reinterpret_cast<uint64_t>(client);
+
+       if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, client->sock, &ev) == -1) {
+               log_perror("epoll_ctl(EPOLL_CTL_MOD)");
+               exit(1);
+       }
+}
+
+void Server::construct_204(Client *client)
+{
+       map<string, string>::const_iterator ping_url_map_it = ping_url_map.find(client->url);
+       assert(ping_url_map_it != ping_url_map.end());
+
+       if (ping_url_map_it->second.empty()) {
+               client->header_or_short_response =
+                       "HTTP/1.0 204 No Content\r\n"
+                       "\r\n";
+       } else {
+               char response[256];
+               snprintf(response, 256,
+                        "HTTP/1.0 204 No Content\r\n"
+                        "Access-Control-Allow-Origin: %s\r\n"
+                        "\r\n",
+                        ping_url_map_it->second.c_str());
+               client->header_or_short_response = response;
+       }
 
        // Switch states.
-       client->state = Client::SENDING_ERROR;
+       client->state = Client::SENDING_SHORT_RESPONSE;
 
        epoll_event ev;
        ev.events = EPOLLOUT | EPOLLET | EPOLLRDHUP;
-       ev.data.u64 = 0;  // Keep Valgrind happy.
-       ev.data.fd = client->sock;
+       ev.data.u64 = reinterpret_cast<uint64_t>(client);
 
        if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, client->sock, &ev) == -1) {
-               perror("epoll_ctl(EPOLL_CTL_MOD)");
+               log_perror("epoll_ctl(EPOLL_CTL_MOD)");
                exit(1);
        }
 }
@@ -526,7 +786,7 @@ void delete_from(vector<T> *v, T elem)
 void Server::close_client(Client *client)
 {
        if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->sock, NULL) == -1) {
-               perror("epoll_ctl(EPOLL_CTL_DEL)");
+               log_perror("epoll_ctl(EPOLL_CTL_DEL)");
                exit(1);
        }
 
@@ -534,45 +794,29 @@ void Server::close_client(Client *client)
        if (client->stream != NULL) {
                delete_from(&client->stream->sleeping_clients, client);
                delete_from(&client->stream->to_process, client);
-               if (client->stream->mark_pool != NULL) {
-                       int fwmark = client->fwmark;
-                       client->stream->mark_pool->release_mark(fwmark);
-               }
        }
 
-       // Bye-bye!
-       int ret;
-       do {
-               ret = close(client->sock);
-       } while (ret == -1 && errno == EINTR);
+       // Log to access_log.
+       access_log->write(client->get_stats());
 
-       if (ret == -1) {
-               perror("close");
-       }
+       // Bye-bye!
+       safe_close(client->sock);
 
        clients.erase(client->sock);
 }
        
-Stream *Server::find_stream(const string &stream_id)
-{
-       map<string, Stream *>::iterator it = streams.find(stream_id);
-       assert(it != streams.end());
-       return it->second;
-}
-
 void Server::process_queued_data()
 {
-       MutexLock lock(&queued_data_mutex);
+       {
+               MutexLock lock(&queued_clients_mutex);
 
-       for (size_t i = 0; i < queued_add_clients.size(); ++i) {
-               add_client(queued_add_clients[i]);
+               for (size_t i = 0; i < queued_add_clients.size(); ++i) {
+                       add_client(queued_add_clients[i]);
+               }
+               queued_add_clients.clear();
        }
-       queued_add_clients.clear();     
-       
-       for (map<string, string>::iterator queued_it = queued_data.begin();
-            queued_it != queued_data.end();
-            ++queued_it) {
-               add_data(queued_it->first, queued_it->second.data(), queued_it->second.size());
+
+       for (size_t i = 0; i < streams.size(); ++i) {   
+               streams[i]->process_queued_data();
        }
-       queued_data.clear();
 }