Revert "Rewrite the entire internal signal handling/wakeup."
[cubemap] / main.cpp
1 #include <assert.h>
2 #include <errno.h>
3 #include <getopt.h>
4 #include <limits.h>
5 #include <signal.h>
6 #include <stddef.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/time.h>
11 #include <sys/wait.h>
12 #include <unistd.h>
13 #include <map>
14 #include <set>
15 #include <string>
16 #include <utility>
17 #include <vector>
18
19 #include "acceptor.h"
20 #include "accesslog.h"
21 #include "config.h"
22 #include "input.h"
23 #include "log.h"
24 #include "markpool.h"
25 #include "serverpool.h"
26 #include "state.pb.h"
27 #include "stats.h"
28 #include "stream.h"
29 #include "util.h"
30 #include "version.h"
31
32 using namespace std;
33
34 AccessLogThread *access_log = NULL;
35 ServerPool *servers = NULL;
36 vector<MarkPool *> mark_pools;
37 volatile bool hupped = false;
38 volatile bool stopped = false;
39
40 struct InputWithRefcount {
41         Input *input;
42         int refcount;
43 };
44
45 void hup(int signum)
46 {
47         hupped = true;
48         if (signum == SIGINT) {
49                 stopped = true;
50         }
51 }
52
53 CubemapStateProto collect_state(const timeval &serialize_start,
54                                 const vector<Acceptor *> acceptors,
55                                 const multimap<string, InputWithRefcount> inputs,
56                                 ServerPool *servers)
57 {
58         CubemapStateProto state = servers->serialize();  // Fills streams() and clients().
59         state.set_serialize_start_sec(serialize_start.tv_sec);
60         state.set_serialize_start_usec(serialize_start.tv_usec);
61         
62         for (size_t i = 0; i < acceptors.size(); ++i) {
63                 state.add_acceptors()->MergeFrom(acceptors[i]->serialize());
64         }
65
66         for (multimap<string, InputWithRefcount>::const_iterator input_it = inputs.begin();
67              input_it != inputs.end();
68              ++input_it) {
69                 state.add_inputs()->MergeFrom(input_it->second.input->serialize());
70         }
71
72         return state;
73 }
74
75 // Find all port statements in the configuration file, and create acceptors for htem.
76 vector<Acceptor *> create_acceptors(
77         const Config &config,
78         map<int, Acceptor *> *deserialized_acceptors)
79 {
80         vector<Acceptor *> acceptors;
81         for (unsigned i = 0; i < config.acceptors.size(); ++i) {
82                 const AcceptorConfig &acceptor_config = config.acceptors[i];
83                 Acceptor *acceptor = NULL;
84                 map<int, Acceptor *>::iterator deserialized_acceptor_it =
85                         deserialized_acceptors->find(acceptor_config.port);
86                 if (deserialized_acceptor_it != deserialized_acceptors->end()) {
87                         acceptor = deserialized_acceptor_it->second;
88                         deserialized_acceptors->erase(deserialized_acceptor_it);
89                 } else {
90                         int server_sock = create_server_socket(acceptor_config.port, TCP_SOCKET);
91                         acceptor = new Acceptor(server_sock, acceptor_config.port);
92                 }
93                 acceptor->run();
94                 acceptors.push_back(acceptor);
95         }
96
97         // Close all acceptors that are no longer in the configuration file.
98         for (map<int, Acceptor *>::iterator acceptor_it = deserialized_acceptors->begin();
99              acceptor_it != deserialized_acceptors->end();
100              ++acceptor_it) {
101                 acceptor_it->second->close_socket();
102                 delete acceptor_it->second;
103         }
104
105         return acceptors;
106 }
107
108 // Find all streams in the configuration file, and create inputs for them.
109 void create_config_inputs(const Config &config, multimap<string, InputWithRefcount> *inputs)
110 {
111         for (unsigned i = 0; i < config.streams.size(); ++i) {
112                 const StreamConfig &stream_config = config.streams[i];
113                 if (stream_config.src.empty()) {
114                         continue;
115                 }
116
117                 string src = stream_config.src;
118                 if (inputs->count(src) != 0) {
119                         continue;
120                 }
121
122                 InputWithRefcount iwr;
123                 iwr.input = create_input(src);
124                 if (iwr.input == NULL) {
125                         log(ERROR, "did not understand URL '%s', clients will not get any data.",
126                                 src.c_str());
127                         continue;
128                 }
129                 iwr.refcount = 0;
130                 inputs->insert(make_pair(src, iwr));
131         }
132 }
133
134 void create_streams(const Config &config,
135                     const set<string> &deserialized_stream_ids,
136                     multimap<string, InputWithRefcount> *inputs)
137 {
138         for (unsigned i = 0; i < config.mark_pools.size(); ++i) {
139                 const MarkPoolConfig &mp_config = config.mark_pools[i];
140                 mark_pools.push_back(new MarkPool(mp_config.from, mp_config.to));
141         }
142
143         set<string> expecting_stream_ids = deserialized_stream_ids;
144         for (unsigned i = 0; i < config.streams.size(); ++i) {
145                 const StreamConfig &stream_config = config.streams[i];
146                 if (deserialized_stream_ids.count(stream_config.stream_id) == 0) {
147                         servers->add_stream(stream_config.stream_id,
148                                             stream_config.backlog_size,
149                                             Stream::Encoding(stream_config.encoding));
150                 } else {
151                         servers->set_backlog_size(stream_config.stream_id, stream_config.backlog_size);
152                         servers->set_encoding(stream_config.stream_id,
153                                               Stream::Encoding(stream_config.encoding));
154                 }
155                 expecting_stream_ids.erase(stream_config.stream_id);
156
157                 if (stream_config.mark_pool != -1) {
158                         servers->set_mark_pool(stream_config.stream_id,
159                                                mark_pools[stream_config.mark_pool]);
160                 }
161
162                 string src = stream_config.src;
163                 if (!src.empty()) {
164                         multimap<string, InputWithRefcount>::iterator input_it = inputs->find(src);
165                         assert(input_it != inputs->end());
166                         input_it->second.input->add_destination(stream_config.stream_id);
167                         ++input_it->second.refcount;
168                 }
169         }
170
171         // Warn about any servers we've lost.
172         // TODO: Make an option (delete=yes?) to actually shut down streams.
173         for (set<string>::const_iterator stream_it = expecting_stream_ids.begin();
174              stream_it != expecting_stream_ids.end();
175              ++stream_it) {
176                 string stream_id = *stream_it;
177                 log(WARNING, "stream '%s' disappeared from the configuration file. "
178                              "It will not be deleted, but clients will not get any new inputs.",
179                              stream_id.c_str());
180         }
181 }
182         
183 void open_logs(const vector<LogConfig> &log_destinations)
184 {
185         for (size_t i = 0; i < log_destinations.size(); ++i) {
186                 if (log_destinations[i].type == LogConfig::LOG_TYPE_FILE) {
187                         add_log_destination_file(log_destinations[i].filename);
188                 } else if (log_destinations[i].type == LogConfig::LOG_TYPE_CONSOLE) {
189                         add_log_destination_console();
190                 } else if (log_destinations[i].type == LogConfig::LOG_TYPE_SYSLOG) {
191                         add_log_destination_syslog();
192                 } else {
193                         assert(false);
194                 }
195         }
196         start_logging();
197 }
198         
199 bool dry_run_config(const std::string &argv0, const std::string &config_filename)
200 {
201         char *argv0_copy = strdup(argv0.c_str());
202         char *config_filename_copy = strdup(config_filename.c_str());
203
204         pid_t pid = fork();
205         switch (pid) {
206         case -1:
207                 log_perror("fork()");
208                 free(argv0_copy);
209                 free(config_filename_copy);
210                 return false;
211         case 0:
212                 // Child.
213                 execlp(argv0_copy, argv0_copy, "--test-config", config_filename_copy, NULL);
214                 log_perror(argv0_copy);
215                 _exit(1);
216         default:
217                 // Parent.
218                 break;
219         }
220                 
221         free(argv0_copy);
222         free(config_filename_copy);
223
224         int status;
225         pid_t err;
226         do {
227                 err = waitpid(pid, &status, 0);
228         } while (err == -1 && errno == EINTR);
229
230         if (err == -1) {
231                 log_perror("waitpid()");
232                 return false;
233         }       
234
235         return (WIFEXITED(status) && WEXITSTATUS(status) == 0);
236 }
237
238 int main(int argc, char **argv)
239 {
240         signal(SIGHUP, hup);
241         signal(SIGINT, hup);
242         signal(SIGPIPE, SIG_IGN);
243         
244         // Parse options.
245         int state_fd = -1;
246         bool test_config = false;
247         for ( ;; ) {
248                 static const option long_options[] = {
249                         { "state", required_argument, 0, 's' },
250                         { "test-config", no_argument, 0, 't' },
251                         { 0, 0, 0, 0 }
252                 };
253                 int option_index = 0;
254                 int c = getopt_long(argc, argv, "s:t", long_options, &option_index);
255      
256                 if (c == -1) {
257                         break;
258                 }
259                 switch (c) {
260                 case 's':
261                         state_fd = atoi(optarg);
262                         break;
263                 case 't':
264                         test_config = true;
265                         break;
266                 default:
267                         fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
268                         exit(1);
269                 }
270         }
271
272         string config_filename = "cubemap.config";
273         if (optind < argc) {
274                 config_filename = argv[optind++];
275         }
276
277         // Canonicalize argv[0] and config_filename.
278         char argv0_canon[PATH_MAX];
279         char config_filename_canon[PATH_MAX];
280
281         if (realpath(argv[0], argv0_canon) == NULL) {
282                 log_perror(argv[0]);
283                 exit(1);
284         }
285         if (realpath(config_filename.c_str(), config_filename_canon) == NULL) {
286                 log_perror(config_filename.c_str());
287                 exit(1);
288         }
289
290         // Now parse the configuration file.
291         Config config;
292         if (!parse_config(config_filename_canon, &config)) {
293                 exit(1);
294         }
295         if (test_config) {
296                 exit(0);
297         }
298         
299         // Ideally we'd like to daemonize only when we've started up all threads etc.,
300         // but daemon() forks, which is not good in multithreaded software, so we'll
301         // have to do it here.
302         if (config.daemonize) {
303                 if (daemon(0, 0) == -1) {
304                         log_perror("daemon");
305                         exit(1);
306                 }
307         }
308
309 start:
310         // Open logs as soon as possible.
311         open_logs(config.log_destinations);
312
313         log(INFO, "Cubemap " SERVER_VERSION " starting.");
314         if (config.access_log_file.empty()) {
315                 // Create a dummy logger.
316                 access_log = new AccessLogThread();
317         } else {
318                 access_log = new AccessLogThread(config.access_log_file);
319         }
320         access_log->run();
321
322         servers = new ServerPool(config.num_servers);
323
324         CubemapStateProto loaded_state;
325         struct timeval serialize_start;
326         set<string> deserialized_stream_ids;
327         map<int, Acceptor *> deserialized_acceptors;
328         multimap<string, InputWithRefcount> inputs;  // multimap due to older versions without deduplication.
329         if (state_fd != -1) {
330                 log(INFO, "Deserializing state from previous process...");
331                 string serialized;
332                 if (!read_tempfile(state_fd, &serialized)) {
333                         exit(1);
334                 }
335                 if (!loaded_state.ParseFromString(serialized)) {
336                         log(ERROR, "Failed deserialization of state.");
337                         exit(1);
338                 }
339
340                 serialize_start.tv_sec = loaded_state.serialize_start_sec();
341                 serialize_start.tv_usec = loaded_state.serialize_start_usec();
342
343                 // Deserialize the streams.
344                 for (int i = 0; i < loaded_state.streams_size(); ++i) {
345                         const StreamProto &stream = loaded_state.streams(i);
346
347                         vector<int> data_fds;
348                         for (int j = 0; j < stream.data_fds_size(); ++j) {
349                                 data_fds.push_back(stream.data_fds(j));
350                         }
351
352                         // Older versions stored the data once in the protobuf instead of
353                         // sending around file descriptors.
354                         if (data_fds.empty() && stream.has_data()) {
355                                 data_fds.push_back(make_tempfile(stream.data()));
356                         }
357
358                         servers->add_stream_from_serialized(stream, data_fds);
359                         deserialized_stream_ids.insert(stream.stream_id());
360                 }
361
362                 // Deserialize the inputs. Note that we don't actually add them to any stream yet.
363                 for (int i = 0; i < loaded_state.inputs_size(); ++i) {
364                         InputWithRefcount iwr;
365                         iwr.input = create_input(loaded_state.inputs(i));
366                         iwr.refcount = 0;
367                         inputs.insert(make_pair(loaded_state.inputs(i).url(), iwr));
368                 } 
369
370                 // Deserialize the acceptors.
371                 for (int i = 0; i < loaded_state.acceptors_size(); ++i) {
372                         deserialized_acceptors.insert(make_pair(
373                                 loaded_state.acceptors(i).port(),
374                                 new Acceptor(loaded_state.acceptors(i))));
375                 }
376
377                 log(INFO, "Deserialization done.");
378         }
379
380         // Add any new inputs coming from the config.
381         create_config_inputs(config, &inputs);
382         
383         // Find all streams in the configuration file, create them, and connect to the inputs.
384         create_streams(config, deserialized_stream_ids, &inputs);
385         vector<Acceptor *> acceptors = create_acceptors(config, &deserialized_acceptors);
386         
387         // Put back the existing clients. It doesn't matter which server we
388         // allocate them to, so just do round-robin. However, we need to add
389         // them after the mark pools have been set up.
390         for (int i = 0; i < loaded_state.clients_size(); ++i) {
391                 servers->add_client_from_serialized(loaded_state.clients(i));
392         }
393         
394         servers->run();
395
396         // Now delete all inputs that are longer in use, and start the others.
397         for (multimap<string, InputWithRefcount>::iterator input_it = inputs.begin();
398              input_it != inputs.end(); ) {
399                 if (input_it->second.refcount == 0) {
400                         log(WARNING, "Input '%s' no longer in use, closing.",
401                             input_it->first.c_str());
402                         input_it->second.input->close_socket();
403                         delete input_it->second.input;
404                         inputs.erase(input_it++);
405                 } else {
406                         input_it->second.input->run();
407                         ++input_it;
408                 }
409         }
410
411         // Start writing statistics.
412         StatsThread *stats_thread = NULL;
413         if (!config.stats_file.empty()) {
414                 stats_thread = new StatsThread(config.stats_file, config.stats_interval);
415                 stats_thread->run();
416         }
417
418         struct timeval server_start;
419         gettimeofday(&server_start, NULL);
420         if (state_fd != -1) {
421                 // Measure time from we started deserializing (below) to now, when basically everything
422                 // is up and running. This is, in other words, a conservative estimate of how long our
423                 // “glitch” period was, not counting of course reconnects if the configuration changed.
424                 double glitch_time = server_start.tv_sec - serialize_start.tv_sec +
425                         1e-6 * (server_start.tv_usec - serialize_start.tv_usec);
426                 log(INFO, "Re-exec happened in approx. %.0f ms.", glitch_time * 1000.0);
427         }
428
429         while (!hupped) {
430                 usleep(100000);
431         }
432
433         // OK, we've been HUPed. Time to shut down everything, serialize, and re-exec.
434         gettimeofday(&serialize_start, NULL);
435
436         if (stats_thread != NULL) {
437                 stats_thread->stop();
438                 delete stats_thread;
439         }
440         for (size_t i = 0; i < acceptors.size(); ++i) {
441                 acceptors[i]->stop();
442         }
443         for (multimap<string, InputWithRefcount>::iterator input_it = inputs.begin();
444              input_it != inputs.end();
445              ++input_it) {
446                 input_it->second.input->stop();
447         }
448         servers->stop();
449
450         CubemapStateProto state;
451         if (stopped) {
452                 log(INFO, "Shutting down.");
453         } else {
454                 log(INFO, "Serializing state and re-execing...");
455                 state = collect_state(
456                         serialize_start, acceptors, inputs, servers);
457                 string serialized;
458                 state.SerializeToString(&serialized);
459                 state_fd = make_tempfile(serialized);
460                 if (state_fd == -1) {
461                         exit(1);
462                 }
463         }
464         delete servers;
465
466         for (unsigned i = 0; i < mark_pools.size(); ++i) {
467                 delete mark_pools[i];
468         }
469         mark_pools.clear();
470
471         access_log->stop();
472         delete access_log;
473         shut_down_logging();
474
475         if (stopped) {
476                 exit(0);
477         }
478
479         // OK, so the signal was SIGHUP. Check that the new config is okay, then exec the new binary.
480         if (!dry_run_config(argv0_canon, config_filename_canon)) {
481                 open_logs(config.log_destinations);
482                 log(ERROR, "%s --test-config failed. Restarting old version instead of new.", argv[0]);
483                 hupped = false;
484                 shut_down_logging();
485                 goto start;
486         }
487          
488         char buf[16];
489         sprintf(buf, "%d", state_fd);
490
491         for ( ;; ) {
492                 execlp(argv0_canon, argv0_canon, config_filename_canon, "--state", buf, NULL);
493                 open_logs(config.log_destinations);
494                 log_perror("execlp");
495                 log(ERROR, "re-exec of %s failed. Waiting 0.2 seconds and trying again...", argv0_canon);
496                 shut_down_logging();
497                 usleep(200000);
498         }
499 }