]> git.sesse.net Git - cubemap/blobdiff - input.cpp
Parse the HTTP header (more) properly, and send the headers on to any connecting...
[cubemap] / input.cpp
index 48465664b56bf7429f1023d44a4ec9d9b7b81f30..47c3c0193472d8cfa56747fd2461cdf5595632cd 100644 (file)
--- a/input.cpp
+++ b/input.cpp
@@ -3,12 +3,14 @@
 #include <stdint.h>
 #include <assert.h>
 #include <arpa/inet.h>
-#include <curl/curl.h>
 #include <sys/socket.h>
 #include <pthread.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
-#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <poll.h>
 #include <errno.h>
 #include <vector>
 #include <string>
 #include "mutexlock.h"
 #include "input.h"
 #include "server.h"
+#include "serverpool.h"
+#include "parse.h"
+#include "state.pb.h"
 
 using namespace std;
 
-extern Server *servers;
+extern ServerPool *servers;
+         
+// Extremely rudimentary URL parsing.
+bool parse_url(const string &url, string *host, string *port, string *path)
+{
+       if (url.find("http://") != 0) {
+               return false;
+       }
+       
+       string rest = url.substr(strlen("http://"));
+       size_t split = rest.find_first_of(":/");
+       if (split == string::npos) {
+               // http://foo
+               *host = rest;
+               *port = "http";
+               *path = "/";
+               return true;
+       }
+
+       *host = string(rest.begin(), rest.begin() + split);
+       char ch = rest[split];  // Colon or slash.
+       rest = string(rest.begin() + split + 1, rest.end());
+
+       if (ch == ':') {
+               // Parse the port.
+               split = rest.find_first_of('/');
+               if (split == string::npos) {
+                       // http://foo:1234
+                       *port = rest;
+                       *path = "/";
+                       return true;
+               } else {
+                       // http://foo:1234/bar
+                       *port = string(rest.begin(), rest.begin() + split);
+                       *path = string(rest.begin() + split, rest.end());
+                       return true;
+               }
+       }
+
+       // http://foo/bar
+       *port = "http";
+       *path = rest;
+       return true;
+}
 
 Input::Input(const string &stream_id, const string &url)
-       : stream_id(stream_id),
+       : state(NOT_CONNECTED),
+         stream_id(stream_id),
          url(url),
-         has_metacube_header(false)
+         has_metacube_header(false),
+         sock(-1)
+{
+}
+
+Input::Input(const InputProto &serialized)
+       : state(State(serialized.state())),
+         stream_id(serialized.stream_id()),
+         url(serialized.url()),
+         request(serialized.request()),
+         request_bytes_sent(serialized.request_bytes_sent()),
+         response(serialized.response()),
+         http_header(serialized.http_header()),
+         has_metacube_header(serialized.has_metacube_header()),
+         sock(serialized.sock())
 {
+       pending_data.resize(serialized.pending_data().size());
+       memcpy(&pending_data[0], serialized.pending_data().data(), serialized.pending_data().size());
+
+       parse_url(url, &host, &port, &path);  // Don't care if it fails.
+}
+
+InputProto Input::serialize() const
+{
+       InputProto serialized;
+       serialized.set_state(state);
+       serialized.set_stream_id(stream_id);
+       serialized.set_url(url);
+       serialized.set_request(request);
+       serialized.set_request_bytes_sent(request_bytes_sent);
+       serialized.set_response(response);
+       serialized.set_http_header(http_header);
+       serialized.set_pending_data(string(pending_data.begin(), pending_data.end()));
+       serialized.set_has_metacube_header(has_metacube_header);
+       serialized.set_sock(sock);
+       return serialized;
 }
 
 void Input::run()
@@ -58,33 +141,296 @@ void *Input::do_work_thunk(void *arg)
        return NULL;
 }
 
-void Input::do_work()
+int Input::lookup_and_connect(const string &host, const string &port)
 {
-       CURL *curl = curl_easy_init();
+       addrinfo *ai;
+       int err = getaddrinfo(host.c_str(), port.c_str(), NULL, &ai);
+       if (err == -1) {
+               fprintf(stderr, "WARNING: Lookup of '%s' failed (%s).\n",
+                       host.c_str(), gai_strerror(err));
+               freeaddrinfo(ai);
+               return -1;
+       }
 
-       while (!should_stop) {
-               curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
-               curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &Input::curl_callback_thunk);
-               curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
-               curl_easy_perform(curl);
-               printf("Transfer ended, waiting 0.2 seconds and restarting...\n");
-               usleep(200000);
+       // Connect to everything in turn until we have a socket.
+       while (ai && !should_stop) {
+               int sock = socket(ai->ai_family, SOCK_STREAM, IPPROTO_TCP);
+               if (sock == -1) {
+                       // Could be e.g. EPROTONOSUPPORT. The show must go on.
+                       continue;
+               }
+
+               do {
+                       err = connect(sock, ai->ai_addr, ai->ai_addrlen);
+               } while (err == -1 && errno == EINTR);
+
+               if (err != -1) {
+                       freeaddrinfo(ai);
+                       return sock;
+               }
+
+               ai = ai->ai_next;
        }
-}
 
-size_t Input::curl_callback_thunk(char *ptr, size_t size, size_t nmemb, void *userdata)
+       // Give the last one as error.
+       fprintf(stderr, "WARNING: Connect to '%s' failed (%s)\n",
+               host.c_str(), strerror(errno));
+       freeaddrinfo(ai);
+       return -1;
+}
+       
+bool Input::parse_response(const std::string &request)
 {
-       Input *input = static_cast<Input *>(userdata);
-       if (input->should_stop) {
-               return 0;
+       vector<string> lines = split_lines(response);
+       if (lines.empty()) {
+               fprintf(stderr, "WARNING: Empty HTTP response from input.\n");
+               return false;
+       }
+
+       vector<string> first_line_tokens = split_tokens(lines[0]);
+       if (first_line_tokens.size() < 2) {
+               fprintf(stderr, "WARNING: Malformed response line '%s' from input.\n",
+                       lines[0].c_str());
+               return false;
+       }
+
+       int response = atoi(first_line_tokens[1].c_str());
+       if (response != 200) {
+               fprintf(stderr, "WARNING: Non-200 response '%s' from input.\n",
+                       lines[0].c_str());
+               return false;
+       }
+
+       multimap<string, string> parameters;
+       for (size_t i = 1; i < lines.size(); ++i) {
+               size_t split = lines[i].find(":");
+               if (split == string::npos) {
+                       fprintf(stderr, "WARNING: Ignoring malformed HTTP response line '%s'\n",
+                               lines[i].c_str());
+                       continue;
+               }
+
+               string key(lines[i].begin(), lines[i].begin() + split);
+
+               // Skip any spaces after the colon.
+               do {
+                       ++split;
+               } while (split < lines[i].size() && lines[i][split] == ' ');
+
+               string value(lines[i].begin() + split, lines[i].end());
+
+               // Remove “Content-encoding: metacube”.
+               // TODO: Make case-insensitive.
+               if (key == "Content-encoding" && value == "metacube") {
+                       continue;
+               }
+
+               parameters.insert(make_pair(key, value));
        }
 
-       size_t bytes = size * nmemb;
-       input->curl_callback(ptr, bytes);       
-       return bytes;
+       // Change “Server: foo” to “Server: metacube/0.1 (reflecting: foo)”
+       // TODO: Make case-insensitive.
+       // XXX: Use a Via: instead?
+       if (parameters.count("Server") == 0) {
+               parameters.insert(make_pair("Server", "metacube/0.1"));
+       } else {
+               for (multimap<string, string>::iterator it = parameters.begin();
+                    it != parameters.end();
+                    ++it) {
+                       if (it->first != "Server") {
+                               continue;
+                       }
+                       it->second = "metacube/0.1 (reflecting: " + it->second + ")";
+               }
+       }
+
+       // Construct the new HTTP header.
+       http_header = "HTTP/1.0 200 OK\r\n";
+       for (multimap<string, string>::iterator it = parameters.begin();
+            it != parameters.end();
+            ++it) {
+               http_header.append(it->first + ": " + it->second + "\r\n");
+       }
+       http_header.append("\r\n");     
+       servers->set_header(stream_id, http_header);
+
+       return true;
 }
+
+void Input::do_work()
+{
+       while (!should_stop) {
+               if (state == SENDING_REQUEST || state == RECEIVING_HEADER || state == RECEIVING_DATA) {
+                       // Since we are non-blocking, we need to wait for the right state first.
+                       // Wait up to 50 ms, then check should_stop.
+                       pollfd pfd;
+                       pfd.fd = sock;
+                       pfd.events = (state == SENDING_REQUEST) ? POLLOUT : POLLIN;
+                       pfd.events |= POLLRDHUP;
+
+                       int nfds = poll(&pfd, 1, 50);
+                       if (nfds == 0 || (nfds == -1 && errno == EAGAIN)) {
+                               continue;
+                       }
+                       if (nfds == -1) {
+                               perror("poll");
+                               state = CLOSING_SOCKET;
+                       }
+               }
+
+               switch (state) {
+               case NOT_CONNECTED:
+                       request.clear();
+                       request_bytes_sent = 0;
+                       response.clear();
+       
+                       if (!parse_url(url, &host, &port, &path)) {
+                               fprintf(stderr, "Failed to parse URL '%s'\n", url.c_str());
+                               break;
+                       }
+
+                       sock = lookup_and_connect(host, port);
+                       if (sock != -1) {
+                               // Yay, successful connect. Try to set it as nonblocking.
+                               int one = 1;
+                               if (ioctl(sock, FIONBIO, &one) == -1) {
+                                       perror("ioctl(FIONBIO)");
+                                       state = CLOSING_SOCKET;
+                               } else {
+                                       state = SENDING_REQUEST;
+                                       request = "GET " + path + " HTTP/1.0\r\nUser-Agent: cubemap\r\n\r\n";
+                                       request_bytes_sent = 0;
+                               }
+                       }
+                       break;
+               case SENDING_REQUEST: {
+                       size_t to_send = request.size() - request_bytes_sent;
+                       int ret;
+
+                       do {
+                               ret = write(sock, request.data() + request_bytes_sent, to_send);
+                       } while (ret == -1 && errno == EINTR);
+
+                       if (ret == -1) {
+                               perror("write");
+                               state = CLOSING_SOCKET;
+                               continue;
+                       }
+
+                       assert(ret >= 0);
+                       request_bytes_sent += ret;
+
+                       if (request_bytes_sent == request.size()) {
+                               state = RECEIVING_HEADER;
+                       }
+                       break;
+               }
+               case RECEIVING_HEADER: {
+                       char buf[4096];
+                       int ret;
+
+                       do {
+                               ret = read(sock, buf, sizeof(buf));
+                       } while (ret == -1 && errno == EINTR);
+
+                       if (ret == -1) {
+                               perror("read");
+                               state = CLOSING_SOCKET;
+                               continue;
+                       }
+
+                       if (ret == 0) {
+                               // This really shouldn't happen...
+                               fprintf(stderr, "Socket unexpectedly closed while reading header\n");
+                               state = CLOSING_SOCKET;
+                               continue;
+                       }
+                       
+                       RequestParseStatus status = wait_for_double_newline(&response, buf, ret);
+                       
+                       if (status == RP_OUT_OF_SPACE) {
+                               fprintf(stderr, "WARNING: fd %d sent overlong response!\n", sock);
+                               state = CLOSING_SOCKET;
+                               continue;
+                       } else if (status == RP_NOT_FINISHED_YET) {
+                               continue;
+                       }
        
-void Input::curl_callback(char *ptr, size_t bytes)
+                       // OK, so we're fine, but there might be some of the actual data after the response.
+                       // We'll need to deal with that separately.
+                       string extra_data;
+                       if (status == RP_EXTRA_DATA) {
+                               char *ptr = static_cast<char *>(
+                                       memmem(response.data(), response.size(), "\r\n\r\n", 4));
+                               assert(ptr != NULL);
+                               extra_data = string(ptr, &response[0] + response.size());
+                               response.resize(ptr - response.data());
+                       }
+
+                       if (!parse_response(response)) {
+                               state = CLOSING_SOCKET;
+                               continue;
+                       }
+
+                       if (!extra_data.empty()) {
+                               process_data(&extra_data[0], extra_data.size());
+                       }
+
+                       state = RECEIVING_DATA;
+                       break;
+               }
+               case RECEIVING_DATA: {
+                       char buf[4096];
+                       int ret;
+
+                       do {
+                               ret = read(sock, buf, sizeof(buf));
+                       } while (ret == -1 && errno == EINTR);
+
+                       if (ret == -1) {
+                               perror("read");
+                               state = CLOSING_SOCKET;
+                               continue;
+                       }
+
+                       if (ret == 0) {
+                               // This really shouldn't happen...
+                               fprintf(stderr, "Socket unexpectedly closed while reading header\n");
+                               state = CLOSING_SOCKET;
+                               continue;
+                       }
+
+                       process_data(buf, ret);
+                       break;
+               }
+               case CLOSING_SOCKET: {
+                       int err;
+                       do {
+                               err = close(sock);
+                       } while (err == -1 && errno == EINTR);
+
+                       if (err == -1) {
+                               perror("close");
+                       }
+
+                       state = NOT_CONNECTED;
+                       break;
+               }
+               default:
+                       assert(false);
+               }
+
+               // If we are still in NOT_CONNECTED, either something went wrong,
+               // or the connection just got closed.
+               // The earlier steps have already given the error message, if any.
+               if (state == NOT_CONNECTED && !should_stop) {
+                       fprintf(stderr, "Waiting 0.2 second and restarting...\n");
+                       usleep(200000);
+               }
+       }
+}
+
+void Input::process_data(char *ptr, size_t bytes)
 {
        pending_data.insert(pending_data.end(), ptr, ptr + bytes);
 
@@ -130,29 +476,22 @@ void Input::curl_callback(char *ptr, size_t bytes)
                        return;
                }
 
-               process_block(pending_data.data() + sizeof(metacube_block_header), size, flags);
+               // Send this block on to the data.
+               char *inner_data = pending_data.data() + sizeof(metacube_block_header);
+               if (flags & METACUBE_FLAGS_HEADER) {
+                       string header(inner_data, inner_data + size);
+                       servers->set_header(stream_id, http_header + header);
+               } else { 
+                       servers->add_data(stream_id, inner_data, size);
+               }
 
-               // Consume this block. This isn't the most efficient way of dealing with things
+               // Consume the block. This isn't the most efficient way of dealing with things
                // should we have many blocks, but these routines don't need to be too efficient
                // anyway.
                pending_data.erase(pending_data.begin(), pending_data.begin() + sizeof(metacube_block_header) + size);
                has_metacube_header = false;
        }
 }
-               
-void Input::process_block(const char *data, uint32_t size, uint32_t flags)
-{      
-       if (flags & METACUBE_FLAGS_HEADER) {
-               string header(data, data + size);
-               for (int i = 0; i < NUM_SERVERS; ++i) {
-                       servers[i].set_header(stream_id, header);
-               }
-       } else { 
-               for (int i = 0; i < NUM_SERVERS; ++i) {
-                       servers[i].add_data(stream_id, data, size);
-               }
-       }
-}
 
 void Input::drop_pending_data(size_t num_bytes)
 {