]> git.sesse.net Git - cubemap/blobdiff - httpinput.cpp
Make the HTTP inputs time out after 30 seconds of no activity.
[cubemap] / httpinput.cpp
index 354e4b9885aa0d9be0622c4f85d1ec23e67be5d4..7f96970b678fb75c68a31746a39087a1a41e269d 100644 (file)
@@ -1,52 +1,73 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
 #include <assert.h>
-#include <arpa/inet.h>
-#include <sys/socket.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
+#include <errno.h>
 #include <netdb.h>
+#include <netinet/in.h>
 #include <poll.h>
-#include <signal.h>
-#include <errno.h>
-#include <vector>
-#include <string>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <time.h>
+#include <unistd.h>
 #include <map>
+#include <string>
+#include <utility>
+#include <vector>
 
+#include "httpinput.h"
+#include "log.h"
 #include "metacube.h"
 #include "mutexlock.h"
-#include "httpinput.h"
-#include "server.h"
-#include "serverpool.h"
 #include "parse.h"
-#include "version.h"
+#include "serverpool.h"
 #include "state.pb.h"
+#include "util.h"
+#include "version.h"
 
 using namespace std;
 
 extern ServerPool *servers;
-         
-HTTPInput::HTTPInput(const string &stream_id, const string &url)
+
+namespace {
+
+// Compute b-a.
+timespec clock_diff(const timespec &a, const timespec &b)
+{
+       timespec ret;
+       ret.tv_sec = b.tv_sec - a.tv_sec;
+       ret.tv_nsec = b.tv_nsec - a.tv_nsec;
+       if (ret.tv_nsec < 0) {
+               ret.tv_sec--;
+               ret.tv_nsec += 1000000000;
+       }
+       assert(ret.tv_nsec >= 0);
+       return ret;
+}
+
+}  // namespace
+
+HTTPInput::HTTPInput(const string &url)
        : state(NOT_CONNECTED),
-         stream_id(stream_id),
          url(url),
          has_metacube_header(false),
          sock(-1)
 {
+       pthread_mutex_init(&stats_mutex, NULL);
+       stats.url = url;
+       stats.bytes_received = 0;
+       stats.data_bytes_received = 0;
+       stats.connect_time = -1;
 }
 
 HTTPInput::HTTPInput(const InputProto &serialized)
        : state(State(serialized.state())),
-         stream_id(serialized.stream_id()),
          url(serialized.url()),
          request(serialized.request()),
          request_bytes_sent(serialized.request_bytes_sent()),
          response(serialized.response()),
          http_header(serialized.http_header()),
+         stream_header(serialized.stream_header()),
          has_metacube_header(serialized.has_metacube_header()),
          sock(serialized.sock())
 {
@@ -55,33 +76,51 @@ HTTPInput::HTTPInput(const InputProto &serialized)
 
        string protocol;
        parse_url(url, &protocol, &host, &port, &path);  // Don't care if it fails.
+
+       // Older versions stored the extra \r\n in the HTTP header.
+       // Strip it if we find it.
+       if (http_header.size() >= 4 &&
+           memcmp(http_header.data() + http_header.size() - 4, "\r\n\r\n", 4) == 0) {
+               http_header.resize(http_header.size() - 2);
+       }
+
+       pthread_mutex_init(&stats_mutex, NULL);
+       stats.url = url;
+       stats.bytes_received = serialized.bytes_received();
+       stats.data_bytes_received = serialized.data_bytes_received();
+       if (serialized.has_connect_time()) {
+               stats.connect_time = serialized.connect_time();
+       } else {
+               stats.connect_time = time(NULL);
+       }
 }
 
 void HTTPInput::close_socket()
 {
-       int ret;
-       do {
-               ret = close(sock);
-       } while (ret == -1 && errno == EINTR);
-
-       if (ret == -1) {
-               perror("close()");
+       if (sock != -1) {
+               safe_close(sock);
        }
+
+       MutexLock lock(&stats_mutex);
+       stats.connect_time = -1;
 }
 
 InputProto HTTPInput::serialize() const
 {
        InputProto serialized;
        serialized.set_state(state);
-       serialized.set_stream_id(stream_id);
        serialized.set_url(url);
        serialized.set_request(request);
        serialized.set_request_bytes_sent(request_bytes_sent);
        serialized.set_response(response);
        serialized.set_http_header(http_header);
+       serialized.set_stream_header(stream_header);
        serialized.set_pending_data(string(pending_data.begin(), pending_data.end()));
        serialized.set_has_metacube_header(has_metacube_header);
        serialized.set_sock(sock);
+       serialized.set_bytes_received(stats.bytes_received);
+       serialized.set_data_bytes_received(stats.data_bytes_received);
+       serialized.set_connect_time(stats.connect_time);
        return serialized;
 }
 
@@ -89,37 +128,79 @@ int HTTPInput::lookup_and_connect(const string &host, const string &port)
 {
        addrinfo *ai;
        int err = getaddrinfo(host.c_str(), port.c_str(), NULL, &ai);
-       if (err == -1) {
-               fprintf(stderr, "WARNING: Lookup of '%s' failed (%s).\n",
-                       host.c_str(), gai_strerror(err));
-               freeaddrinfo(ai);
+       if (err != 0) {
+               log(WARNING, "[%s] Lookup of '%s' failed (%s).",
+                       url.c_str(), host.c_str(), gai_strerror(err));
                return -1;
        }
 
+       addrinfo *base_ai = ai;
+
        // Connect to everything in turn until we have a socket.
-       while (ai && !should_stop) {
+       for ( ; ai && !should_stop(); ai = ai->ai_next) {
                int sock = socket(ai->ai_family, SOCK_STREAM, IPPROTO_TCP);
                if (sock == -1) {
                        // Could be e.g. EPROTONOSUPPORT. The show must go on.
                        continue;
                }
 
+               // Now do a non-blocking connect. This is important because we want to be able to be
+               // woken up, even though it's rather cumbersome.
+
+               // Set the socket as nonblocking.
+               int one = 1;
+               if (ioctl(sock, FIONBIO, &one) == -1) {
+                       log_perror("ioctl(FIONBIO)");
+                       safe_close(sock);
+                       return -1;                      
+               }
+
+               // Do a non-blocking connect.
                do {
                        err = connect(sock, ai->ai_addr, ai->ai_addrlen);
                } while (err == -1 && errno == EINTR);
 
-               if (err != -1) {
-                       freeaddrinfo(ai);
+               if (err == -1 && errno != EINPROGRESS) {
+                       log_perror("connect");
+                       safe_close(sock);
+                       continue;
+               }
+
+               // Wait for the connect to complete, or an error to happen.
+               for ( ;; ) {
+                       bool complete = wait_for_activity(sock, POLLIN | POLLOUT, NULL);
+                       if (should_stop()) {
+                               safe_close(sock);
+                               return -1;
+                       }
+                       if (complete) {
+                               break;
+                       }
+               }
+
+               // Check whether it ended in an error or not.
+               socklen_t err_size = sizeof(err);
+               if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &err, &err_size) == -1) {
+                       log_perror("getsockopt");
+                       safe_close(sock);
+                       continue;
+               }
+
+               errno = err;
+
+               if (err == 0) {
+                       // Successful connect.
+                       freeaddrinfo(base_ai);
                        return sock;
                }
 
-               ai = ai->ai_next;
+               safe_close(sock);
        }
 
        // Give the last one as error.
-       fprintf(stderr, "WARNING: Connect to '%s' failed (%s)\n",
-               host.c_str(), strerror(errno));
-       freeaddrinfo(ai);
+       log(WARNING, "[%s] Connect to '%s' failed (%s)",
+               url.c_str(), host.c_str(), strerror(errno));
+       freeaddrinfo(base_ai);
        return -1;
 }
        
@@ -127,21 +208,21 @@ bool HTTPInput::parse_response(const std::string &request)
 {
        vector<string> lines = split_lines(response);
        if (lines.empty()) {
-               fprintf(stderr, "WARNING: Empty HTTP response from input.\n");
+               log(WARNING, "[%s] Empty HTTP response from input.", url.c_str());
                return false;
        }
 
        vector<string> first_line_tokens = split_tokens(lines[0]);
        if (first_line_tokens.size() < 2) {
-               fprintf(stderr, "WARNING: Malformed response line '%s' from input.\n",
-                       lines[0].c_str());
+               log(WARNING, "[%s] Malformed response line '%s' from input.",
+                       url.c_str(), lines[0].c_str());
                return false;
        }
 
        int response = atoi(first_line_tokens[1].c_str());
        if (response != 200) {
-               fprintf(stderr, "WARNING: Non-200 response '%s' from input.\n",
-                       lines[0].c_str());
+               log(WARNING, "[%s] Non-200 response '%s' from input.",
+                       url.c_str(), lines[0].c_str());
                return false;
        }
 
@@ -149,8 +230,8 @@ bool HTTPInput::parse_response(const std::string &request)
        for (size_t i = 1; i < lines.size(); ++i) {
                size_t split = lines[i].find(":");
                if (split == string::npos) {
-                       fprintf(stderr, "WARNING: Ignoring malformed HTTP response line '%s'\n",
-                               lines[i].c_str());
+                       log(WARNING, "[%s] Ignoring malformed HTTP response line '%s'",
+                               url.c_str(), lines[i].c_str());
                        continue;
                }
 
@@ -188,6 +269,11 @@ bool HTTPInput::parse_response(const std::string &request)
                }
        }
 
+       // Set “Connection: close”.
+       // TODO: Make case-insensitive.
+       parameters.erase("Connection");
+       parameters.insert(make_pair("Connection", "close"));
+
        // Construct the new HTTP header.
        http_header = "HTTP/1.0 200 OK\r\n";
        for (multimap<string, string>::iterator it = parameters.begin();
@@ -195,30 +281,54 @@ bool HTTPInput::parse_response(const std::string &request)
             ++it) {
                http_header.append(it->first + ": " + it->second + "\r\n");
        }
-       http_header.append("\r\n");     
-       servers->set_header(stream_id, http_header);
+
+       for (size_t i = 0; i < stream_indices.size(); ++i) {
+               servers->set_header(stream_indices[i], http_header, stream_header);
+       }
 
        return true;
 }
 
 void HTTPInput::do_work()
 {
-       while (!should_stop) {
+       timespec last_activity;
+
+       // TODO: Make the timeout persist across restarts.
+       if (state == SENDING_REQUEST || state == RECEIVING_HEADER || state == RECEIVING_DATA) {
+               int err = clock_gettime(CLOCK_MONOTONIC, &last_activity);
+               assert(err != -1);
+       }
+
+       while (!should_stop()) {
                if (state == SENDING_REQUEST || state == RECEIVING_HEADER || state == RECEIVING_DATA) {
-                       // Since we are non-blocking, we need to wait for the right state first.
-                       // Wait up to 50 ms, then check should_stop.
-                       pollfd pfd;
-                       pfd.fd = sock;
-                       pfd.events = (state == SENDING_REQUEST) ? POLLOUT : POLLIN;
-                       pfd.events |= POLLRDHUP;
-
-                       int nfds = poll(&pfd, 1, 50);
-                       if (nfds == 0 || (nfds == -1 && errno == EINTR)) {
+                       // Give the socket 30 seconds since last activity before we time out.
+                       static const int timeout_secs = 30;
+
+                       timespec now;
+                       int err = clock_gettime(CLOCK_MONOTONIC, &now);
+                       assert(err != -1);
+
+                       timespec elapsed = clock_diff(last_activity, now);
+                       if (elapsed.tv_sec >= timeout_secs) {
+                               // Timeout!
+                               log(ERROR, "[%s] Timeout after %d seconds, closing.", url.c_str(), elapsed.tv_sec);
+                               state = CLOSING_SOCKET;
                                continue;
                        }
-                       if (nfds == -1) {
-                               perror("poll");
-                               state = CLOSING_SOCKET;
+
+                       // Basically calculate (30 - (now - last_activity)) = (30 + (last_activity - now)).
+                       // Add a second of slack to account for differences between clocks.
+                       timespec timeout = clock_diff(now, last_activity);
+                       timeout.tv_sec += timeout_secs + 1;
+                       assert(timeout.tv_sec > 0 || (timeout.tv_sec >= 0 && timeout.tv_nsec > 0));
+
+                       bool activity = wait_for_activity(sock, (state == SENDING_REQUEST) ? POLLOUT : POLLIN, &timeout);
+                       if (activity) {
+                               err = clock_gettime(CLOCK_MONOTONIC, &last_activity);
+                               assert(err != -1);
+                       } else {
+                               // OK. Most likely, should_stop was set, or we have timed out.
+                               continue;
                        }
                }
 
@@ -228,11 +338,15 @@ void HTTPInput::do_work()
                        request_bytes_sent = 0;
                        response.clear();
                        pending_data.clear();
+                       has_metacube_header = false;
+                       for (size_t i = 0; i < stream_indices.size(); ++i) {
+                               servers->set_header(stream_indices[i], "", "");
+                       }
 
                        {
                                string protocol;  // Thrown away.
                                if (!parse_url(url, &protocol, &host, &port, &path)) {
-                                       fprintf(stderr, "Failed to parse URL '%s'\n", url.c_str());
+                                       log(WARNING, "[%s] Failed to parse URL '%s'", url.c_str(), url.c_str());
                                        break;
                                }
                        }
@@ -242,13 +356,17 @@ void HTTPInput::do_work()
                                // Yay, successful connect. Try to set it as nonblocking.
                                int one = 1;
                                if (ioctl(sock, FIONBIO, &one) == -1) {
-                                       perror("ioctl(FIONBIO)");
+                                       log_perror("ioctl(FIONBIO)");
                                        state = CLOSING_SOCKET;
                                } else {
                                        state = SENDING_REQUEST;
                                        request = "GET " + path + " HTTP/1.0\r\nUser-Agent: cubemap\r\n\r\n";
                                        request_bytes_sent = 0;
                                }
+
+                               MutexLock lock(&stats_mutex);
+                               stats.connect_time = time(NULL);
+                               clock_gettime(CLOCK_MONOTONIC, &last_activity);
                        }
                        break;
                case SENDING_REQUEST: {
@@ -260,7 +378,7 @@ void HTTPInput::do_work()
                        } while (ret == -1 && errno == EINTR);
 
                        if (ret == -1) {
-                               perror("write");
+                               log_perror("write");
                                state = CLOSING_SOCKET;
                                continue;
                        }
@@ -282,14 +400,15 @@ void HTTPInput::do_work()
                        } while (ret == -1 && errno == EINTR);
 
                        if (ret == -1) {
-                               perror("read");
+                               log_perror("read");
                                state = CLOSING_SOCKET;
                                continue;
                        }
 
                        if (ret == 0) {
                                // This really shouldn't happen...
-                               fprintf(stderr, "Socket unexpectedly closed while reading header\n");
+                               log(ERROR, "[%s] Socket unexpectedly closed while reading header",
+                                          url.c_str());
                                state = CLOSING_SOCKET;
                                continue;
                        }
@@ -297,7 +416,7 @@ void HTTPInput::do_work()
                        RequestParseStatus status = wait_for_double_newline(&response, buf, ret);
                        
                        if (status == RP_OUT_OF_SPACE) {
-                               fprintf(stderr, "WARNING: fd %d sent overlong response!\n", sock);
+                               log(WARNING, "[%s] Sever sent overlong HTTP response!", url.c_str());
                                state = CLOSING_SOCKET;
                                continue;
                        } else if (status == RP_NOT_FINISHED_YET) {
@@ -311,7 +430,7 @@ void HTTPInput::do_work()
                                char *ptr = static_cast<char *>(
                                        memmem(response.data(), response.size(), "\r\n\r\n", 4));
                                assert(ptr != NULL);
-                               extra_data = string(ptr, &response[0] + response.size());
+                               extra_data = string(ptr + 4, &response[0] + response.size());
                                response.resize(ptr - response.data());
                        }
 
@@ -324,6 +443,8 @@ void HTTPInput::do_work()
                                process_data(&extra_data[0], extra_data.size());
                        }
 
+                       log(INFO, "[%s] Connected to '%s', receiving data.",
+                                  url.c_str(), url.c_str());
                        state = RECEIVING_DATA;
                        break;
                }
@@ -336,14 +457,15 @@ void HTTPInput::do_work()
                        } while (ret == -1 && errno == EINTR);
 
                        if (ret == -1) {
-                               perror("read");
+                               log_perror("read");
                                state = CLOSING_SOCKET;
                                continue;
                        }
 
                        if (ret == 0) {
                                // This really shouldn't happen...
-                               fprintf(stderr, "Socket unexpectedly closed while reading header\n");
+                               log(ERROR, "[%s] Socket unexpectedly closed while reading data",
+                                          url.c_str());
                                state = CLOSING_SOCKET;
                                continue;
                        }
@@ -352,15 +474,7 @@ void HTTPInput::do_work()
                        break;
                }
                case CLOSING_SOCKET: {
-                       int err;
-                       do {
-                               err = close(sock);
-                       } while (err == -1 && errno == EINTR);
-
-                       if (err == -1) {
-                               perror("close");
-                       }
-
+                       close_socket();
                        state = NOT_CONNECTED;
                        break;
                }
@@ -371,9 +485,12 @@ void HTTPInput::do_work()
                // If we are still in NOT_CONNECTED, either something went wrong,
                // or the connection just got closed.
                // The earlier steps have already given the error message, if any.
-               if (state == NOT_CONNECTED && !should_stop) {
-                       fprintf(stderr, "Waiting 0.2 second and restarting...\n");
-                       usleep(200000);
+               if (state == NOT_CONNECTED && !should_stop()) {
+                       log(INFO, "[%s] Waiting 0.2 second and restarting...", url.c_str());
+                       timespec timeout_ts;
+                       timeout_ts.tv_sec = 0;
+                       timeout_ts.tv_nsec = 200000000;
+                       wait_for_wakeup(&timeout_ts);
                }
        }
 }
@@ -381,6 +498,10 @@ void HTTPInput::do_work()
 void HTTPInput::process_data(char *ptr, size_t bytes)
 {
        pending_data.insert(pending_data.end(), ptr, ptr + bytes);
+       {
+               MutexLock mutex(&stats_mutex);
+               stats.bytes_received += bytes;
+       }
 
        for ( ;; ) {
                // If we don't have enough data (yet) for even the Metacube header, just return.
@@ -419,18 +540,37 @@ void HTTPInput::process_data(char *ptr, size_t bytes)
                uint32_t size = ntohl(hdr->size);
                uint32_t flags = ntohl(hdr->flags);
 
+               if (size > 262144) {
+                       log(WARNING, "[%s] Metacube block of %d bytes (flags=%x); corrupted header?",
+                               url.c_str(), size, flags);
+               }
+
                // See if we have the entire block. If not, wait for more data.
                if (pending_data.size() < sizeof(metacube_block_header) + size) {
                        return;
                }
 
-               // Send this block on to the data.
+               // Send this block on to the servers.
+               {
+                       MutexLock lock(&stats_mutex);
+                       stats.data_bytes_received += size;
+               }
                char *inner_data = pending_data.data() + sizeof(metacube_block_header);
                if (flags & METACUBE_FLAGS_HEADER) {
-                       string header(inner_data, inner_data + size);
-                       servers->set_header(stream_id, http_header + header);
-               } else { 
-                       servers->add_data(stream_id, inner_data, size);
+                       stream_header = string(inner_data, inner_data + size);
+                       for (size_t i = 0; i < stream_indices.size(); ++i) {
+                               servers->set_header(stream_indices[i], http_header, stream_header);
+                       }
+               } else {
+                       StreamStartSuitability suitable_for_stream_start;
+                       if (flags & METACUBE_FLAGS_NOT_SUITABLE_FOR_STREAM_START) {
+                               suitable_for_stream_start = NOT_SUITABLE_FOR_STREAM_START;
+                       } else {
+                               suitable_for_stream_start = SUITABLE_FOR_STREAM_START;
+                       }
+                       for (size_t i = 0; i < stream_indices.size(); ++i) {
+                               servers->add_data(stream_indices[i], inner_data, size, suitable_for_stream_start);
+                       }
                }
 
                // Consume the block. This isn't the most efficient way of dealing with things
@@ -446,8 +586,20 @@ void HTTPInput::drop_pending_data(size_t num_bytes)
        if (num_bytes == 0) {
                return;
        }
-       fprintf(stderr, "Warning: Dropping %lld junk bytes from stream, maybe it is not a Metacube stream?\n",
-               (long long)num_bytes);
+       log(WARNING, "[%s] Dropping %lld junk bytes from stream, maybe it is not a Metacube stream?",
+               url.c_str(), (long long)num_bytes);
+       assert(pending_data.size() >= num_bytes);
        pending_data.erase(pending_data.begin(), pending_data.begin() + num_bytes);
 }
 
+void HTTPInput::add_destination(int stream_index)
+{
+       stream_indices.push_back(stream_index);
+       servers->set_header(stream_index, http_header, stream_header);
+}
+
+InputStats HTTPInput::get_stats() const
+{
+       MutexLock lock(&stats_mutex);
+       return stats;
+}