Make serve-analysis.js capable of handling e.g. /analysis2.pl.
[remoteglot] / www / serve-analysis.js
1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
3
4 // Modules.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var delta = require('./js/json_delta.js');
12
13 // Constants.
14 var HISTORY_TO_KEEP = 5;
15 var MINIMUM_VERSION = null;
16
17 // Filename to serve.
18 var json_filename = '/srv/analysis.sesse.net/www/analysis.json';
19 if (process.argv.length >= 3) {
20         json_filename = process.argv[2];
21 }
22
23 // Expected destination filename.
24 var serve_url = '/analysis.pl';
25 if (process.argv.length >= 4) {
26         serve_url = process.argv[3];
27 }
28
29 // TCP port to listen on.
30 var port = 5000;
31 if (process.argv.length >= 5) {
32         port = parseInt(process.argv[4]);
33 }
34
35 // If set to 1, we are already processing a JSON update and should not
36 // start a new one. If set to 2, we are _also_ having one in the queue.
37 var json_lock = 0;
38
39 // The current contents of the file to hand out, and its last modified time.
40 var json = undefined;
41
42 // The last five timestamps, and diffs from them to the latest version.
43 var historic_json = [];
44 var diff_json = {};
45
46 // The list of clients that are waiting for new data to show up.
47 // Uniquely keyed by request_id so that we can take them out of
48 // the queue if they close the socket.
49 var sleeping_clients = {};
50 var request_id = 0;
51
52 // List of when clients were last seen, keyed by their unique ID.
53 // Used to show a viewer count to the user.
54 var last_seen_clients = {};
55
56 // The timer used to touch the file every 30 seconds if nobody
57 // else does it for us. This makes sure we don't have clients
58 // hanging indefinitely (which might have them return errors).
59 var touch_timer = undefined;
60
61 // If we are behind Varnish, we can't count the number of clients
62 // ourselves, so some external log-tailing daemon needs to tell us.
63 var viewer_count_override = undefined;
64
65 var replace_json = function(new_json_contents, mtime) {
66         // Generate the list of diffs from the last five versions.
67         if (json !== undefined) {
68                 // If two versions have the same mtime, clients could have either.
69                 // Note the fact, so that we never insert it.
70                 if (json.last_modified == mtime) {
71                         json.invalid_base = true;
72                 }
73                 if (!json.invalid_base) {
74                         historic_json.push(json);
75                         if (historic_json.length > HISTORY_TO_KEEP) {
76                                 historic_json.shift();
77                         }
78                 }
79         }
80
81         var new_json = {
82                 parsed: JSON.parse(new_json_contents),
83                 plain: new_json_contents,
84                 last_modified: mtime
85         };
86         create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
87                 // gzip the new version (non-delta), and put it into place.
88                 zlib.gzip(new_json_contents, function(err, buffer) {
89                         if (err) throw err;
90
91                         new_json.gzip = buffer;
92                         json = new_json;
93                         diff_json = new_diff_json;
94                         json_lock = 0;
95
96                         // Finally, wake up any sleeping clients.
97                         possibly_wakeup_clients();
98                 });
99         });
100 }
101
102 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
103         if (history_left.length == 0) {
104                 cb(new_diff_json);
105                 return;
106         }
107
108         var histobj = history_left.shift();
109         var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
110         var diff_text = JSON.stringify(diff);
111         zlib.gzip(diff_text, function(err, buffer) {
112                 if (err) throw err;
113                 new_diff_json[histobj.last_modified] = {
114                         parsed: diff,
115                         plain: diff_text,
116                         gzip: buffer,
117                         last_modified: new_json.last_modified,
118                 };
119                 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
120         });
121 }
122
123 var reread_file = function(event, filename) {
124         if (filename != path.basename(json_filename)) {
125                 return;
126         }
127         if (json_lock >= 2) {
128                 return;
129         }
130         if (json_lock == 1) {
131                 // Already processing; wait a bit.
132                 json_lock = 2;
133                 setTimeout(function() { json_lock = 1; reread_file(event, filename); }, 100);
134                 return;
135         }
136         json_lock = 1;
137
138         console.log("Rereading " + json_filename);
139         fs.open(json_filename, 'r+', function(err, fd) {
140                 if (err) throw err;
141                 fs.fstat(fd, function(err, st) {
142                         if (err) throw err;
143                         var buffer = new Buffer(1048576);
144                         fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
145                                 if (err) throw err;
146                                 fs.close(fd, function() {
147                                         var new_json_contents = buffer.toString('utf8', 0, bytesRead);
148                                         replace_json(new_json_contents, st.mtime.getTime());
149                                 });
150                         });
151                 });
152         });
153
154         if (touch_timer !== undefined) {
155                 clearTimeout(touch_timer);
156         }
157         touch_timer = setTimeout(function() {
158                 console.log("Touching analysis.json due to no other activity");
159                 var now = Date.now() / 1000;
160                 fs.utimes(json_filename, now, now);
161         }, 30000);
162 }
163 var possibly_wakeup_clients = function() {
164         var num_viewers = count_viewers();
165         for (var i in sleeping_clients) {
166                 mark_recently_seen(sleeping_clients[i].unique);
167                 send_json(sleeping_clients[i].response,
168                           sleeping_clients[i].ims,
169                           sleeping_clients[i].accept_gzip,
170                           num_viewers);
171         }
172         sleeping_clients = {};
173 }
174 var send_404 = function(response) {
175         response.writeHead(404, {
176                 'Content-Type': 'text/plain',
177         });
178         response.write('Something went wrong. Sorry.');
179         response.end();
180 }
181 var handle_viewer_override = function(request, u, response) {
182         // Only accept requests from localhost.
183         var peer = request.socket.localAddress;
184         if ((peer != '127.0.0.1' && peer != '::1') || request.headers['x-forwarded-for']) {
185                 console.log("Refusing viewer override from " + peer);
186                 send_404(response);
187         } else {
188                 viewer_count_override = (u.query)['num'];
189                 response.writeHead(200, {
190                         'Content-Type': 'text/plain',
191                 });
192                 response.write('OK.');
193                 response.end();
194         }
195 }
196 var send_json = function(response, ims, accept_gzip, num_viewers) {
197         var this_json = diff_json[ims] || json;
198
199         var headers = {
200                 'Content-Type': 'text/json',
201                 'X-RGLM': this_json.last_modified,
202                 'X-RGNV': num_viewers,
203                 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
204                 'Vary': 'Accept-Encoding',
205         };
206
207         if (MINIMUM_VERSION) {
208                 headers['X-RGMV'] = MINIMUM_VERSION;
209         }
210
211         if (accept_gzip) {
212                 headers['Content-Length'] = this_json.gzip.length;
213                 headers['Content-Encoding'] = 'gzip';
214                 response.writeHead(200, headers);
215                 response.write(this_json.gzip);
216         } else {
217                 headers['Content-Length'] = this_json.plain.length;
218                 response.writeHead(200, headers);
219                 response.write(this_json.plain);
220         }
221         response.end();
222 }
223 var mark_recently_seen = function(unique) {
224         if (unique) {
225                 last_seen_clients[unique] = (new Date).getTime();
226         }
227 }
228 var count_viewers = function() {
229         if (viewer_count_override !== undefined) {
230                 return viewer_count_override;
231         }
232
233         var now = (new Date).getTime();
234
235         // Go through and remove old viewers, and count them at the same time.
236         var new_last_seen_clients = {};
237         var num_viewers = 0;
238         for (var unique in last_seen_clients) {
239                 if (now - last_seen_clients[unique] < 5000) {
240                         ++num_viewers;
241                         new_last_seen_clients[unique] = last_seen_clients[unique];
242                 }
243         }
244
245         // Also add sleeping clients that we would otherwise assume timed out.
246         for (var request_id in sleeping_clients) {
247                 var unique = sleeping_clients[request_id].unique;
248                 if (unique && !(unique in new_last_seen_clients)) {
249                         ++num_viewers;
250                 }
251         }
252
253         last_seen_clients = new_last_seen_clients;
254         return num_viewers;
255 }
256
257 // Set up a watcher to catch changes to the file, then do an initial read
258 // to make sure we have a copy.
259 fs.watch(path.dirname(json_filename), reread_file);
260 reread_file(null, path.basename(json_filename));
261
262 var server = http.createServer();
263 server.on('request', function(request, response) {
264         var u = url.parse(request.url, true);
265         var ims = (u.query)['ims'];
266         var unique = (u.query)['unique'];
267
268         console.log(((new Date).getTime()*1e-3).toFixed(3) + " " + request.url);
269         if (u.pathname === '/override-num-viewers') {
270                 handle_viewer_override(request, u, response);
271                 return;
272         }
273         if (u.pathname !== serve_url) {
274                 // This is not the request you are looking for.
275                 send_404(response);
276                 return;
277         }
278
279         mark_recently_seen(unique);
280
281         var accept_encoding = request.headers['accept-encoding'];
282         var accept_gzip;
283         if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
284                 accept_gzip = true;
285         } else {
286                 accept_gzip = false;
287         }
288
289         // If we already have something newer than what the user has,
290         // just send it out and be done with it.
291         if (json !== undefined && (!ims || json.last_modified > ims)) {
292                 send_json(response, ims, accept_gzip, count_viewers());
293                 return;
294         }
295
296         // OK, so we need to hang until we have something newer.
297         // Put the user on the wait list.
298         var client = {};
299         client.response = response;
300         client.request_id = request_id;
301         client.accept_gzip = accept_gzip;
302         client.unique = unique;
303         client.ims = ims;
304         sleeping_clients[request_id++] = client;
305
306         request.socket.client = client;
307 });
308 server.on('connection', function(socket) {
309         socket.on('close', function() {
310                 var client = socket.client;
311                 if (client) {
312                         mark_recently_seen(client.unique);
313                         delete sleeping_clients[client.request_id];
314                 }
315         });
316 });
317
318 server.listen(port);