Refactor print_pv a bit.
[remoteglot] / www / serve-analysis.js
1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
3
4 // Modules.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var readline = require('readline');
12 var child_process = require('child_process');
13 var delta = require('./js/json_delta.js');
14
15 // Constants.
16 var HISTORY_TO_KEEP = 5;
17 var MINIMUM_VERSION = null;
18 var COUNT_FROM_VARNISH_LOG = true;
19
20 // Filename to serve.
21 var json_filename = '/srv/analysis.sesse.net/www/analysis.json';
22 if (process.argv.length >= 3) {
23         json_filename = process.argv[2];
24 }
25
26 // Expected destination filename.
27 var serve_url = '/analysis.pl';
28 if (process.argv.length >= 4) {
29         serve_url = process.argv[3];
30 }
31
32 // TCP port to listen on.
33 var port = 5000;
34 if (process.argv.length >= 5) {
35         port = parseInt(process.argv[4]);
36 }
37
38 // If set to 1, we are already processing a JSON update and should not
39 // start a new one. If set to 2, we are _also_ having one in the queue.
40 var json_lock = 0;
41
42 // The current contents of the file to hand out, and its last modified time.
43 var json = undefined;
44
45 // The last five timestamps, and diffs from them to the latest version.
46 var historic_json = [];
47 var diff_json = {};
48
49 // The list of clients that are waiting for new data to show up.
50 // Uniquely keyed by request_id so that we can take them out of
51 // the queue if they close the socket.
52 var sleeping_clients = {};
53 var request_id = 0;
54
55 // List of when clients were last seen, keyed by their unique ID.
56 // Used to show a viewer count to the user.
57 var last_seen_clients = {};
58
59 // The timer used to touch the file every 30 seconds if nobody
60 // else does it for us. This makes sure we don't have clients
61 // hanging indefinitely (which might have them return errors).
62 var touch_timer = undefined;
63
64 // If we are behind Varnish, we can't count the number of clients
65 // ourselves, so we need to get it from parsing varnishncsa.
66 var viewer_count_override = undefined;
67
68 var replace_json = function(new_json_contents, mtime) {
69         // Generate the list of diffs from the last five versions.
70         if (json !== undefined) {
71                 // If two versions have the same mtime, clients could have either.
72                 // Note the fact, so that we never insert it.
73                 if (json.last_modified == mtime) {
74                         json.invalid_base = true;
75                 }
76                 if (!json.invalid_base) {
77                         historic_json.push(json);
78                         if (historic_json.length > HISTORY_TO_KEEP) {
79                                 historic_json.shift();
80                         }
81                 }
82         }
83
84         var new_json = {
85                 parsed: JSON.parse(new_json_contents),
86                 plain: new_json_contents,
87                 last_modified: mtime
88         };
89         create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
90                 // gzip the new version (non-delta), and put it into place.
91                 zlib.gzip(new_json_contents, function(err, buffer) {
92                         if (err) throw err;
93
94                         new_json.gzip = buffer;
95                         json = new_json;
96                         diff_json = new_diff_json;
97                         json_lock = 0;
98
99                         // Finally, wake up any sleeping clients.
100                         possibly_wakeup_clients();
101                 });
102         });
103 }
104
105 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
106         if (history_left.length == 0) {
107                 cb(new_diff_json);
108                 return;
109         }
110
111         var histobj = history_left.shift();
112         var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
113         var diff_text = JSON.stringify(diff);
114         zlib.gzip(diff_text, function(err, buffer) {
115                 if (err) throw err;
116                 new_diff_json[histobj.last_modified] = {
117                         parsed: diff,
118                         plain: diff_text,
119                         gzip: buffer,
120                         last_modified: new_json.last_modified,
121                 };
122                 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
123         });
124 }
125
126 var reread_file = function(event, filename) {
127         if (filename != path.basename(json_filename)) {
128                 return;
129         }
130         if (json_lock >= 2) {
131                 return;
132         }
133         if (json_lock == 1) {
134                 // Already processing; wait a bit.
135                 json_lock = 2;
136                 setTimeout(function() { json_lock = 1; reread_file(event, filename); }, 100);
137                 return;
138         }
139         json_lock = 1;
140
141         console.log("Rereading " + json_filename);
142         fs.open(json_filename, 'r', function(err, fd) {
143                 if (err) throw err;
144                 fs.fstat(fd, function(err, st) {
145                         if (err) throw err;
146                         var buffer = new Buffer(1048576);
147                         fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
148                                 if (err) throw err;
149                                 fs.close(fd, function() {
150                                         var new_json_contents = buffer.toString('utf8', 0, bytesRead);
151                                         replace_json(new_json_contents, st.mtime.getTime());
152                                 });
153                         });
154                 });
155         });
156
157         if (touch_timer !== undefined) {
158                 clearTimeout(touch_timer);
159         }
160         touch_timer = setTimeout(function() {
161                 console.log("Touching analysis.json due to no other activity");
162                 var now = Date.now() / 1000;
163                 fs.utimes(json_filename, now, now);
164         }, 30000);
165 }
166 var possibly_wakeup_clients = function() {
167         var num_viewers = count_viewers();
168         for (var i in sleeping_clients) {
169                 mark_recently_seen(sleeping_clients[i].unique);
170                 send_json(sleeping_clients[i].response,
171                           sleeping_clients[i].ims,
172                           sleeping_clients[i].accept_gzip,
173                           num_viewers);
174         }
175         sleeping_clients = {};
176 }
177 var send_404 = function(response) {
178         response.writeHead(404, {
179                 'Content-Type': 'text/plain',
180         });
181         response.write('Something went wrong. Sorry.');
182         response.end();
183 }
184 var send_json = function(response, ims, accept_gzip, num_viewers) {
185         var this_json = diff_json[ims] || json;
186
187         var headers = {
188                 'Content-Type': 'text/json',
189                 'X-RGLM': this_json.last_modified,
190                 'X-RGNV': num_viewers,
191                 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
192                 'Vary': 'Accept-Encoding',
193         };
194
195         if (MINIMUM_VERSION) {
196                 headers['X-RGMV'] = MINIMUM_VERSION;
197         }
198
199         if (accept_gzip) {
200                 headers['Content-Length'] = this_json.gzip.length;
201                 headers['Content-Encoding'] = 'gzip';
202                 response.writeHead(200, headers);
203                 response.write(this_json.gzip);
204         } else {
205                 headers['Content-Length'] = this_json.plain.length;
206                 response.writeHead(200, headers);
207                 response.write(this_json.plain);
208         }
209         response.end();
210 }
211 var mark_recently_seen = function(unique) {
212         if (unique) {
213                 last_seen_clients[unique] = (new Date).getTime();
214         }
215 }
216 var count_viewers = function() {
217         if (viewer_count_override !== undefined) {
218                 return viewer_count_override;
219         }
220
221         var now = (new Date).getTime();
222
223         // Go through and remove old viewers, and count them at the same time.
224         var new_last_seen_clients = {};
225         var num_viewers = 0;
226         for (var unique in last_seen_clients) {
227                 if (now - last_seen_clients[unique] < 5000) {
228                         ++num_viewers;
229                         new_last_seen_clients[unique] = last_seen_clients[unique];
230                 }
231         }
232
233         // Also add sleeping clients that we would otherwise assume timed out.
234         for (var request_id in sleeping_clients) {
235                 var unique = sleeping_clients[request_id].unique;
236                 if (unique && !(unique in new_last_seen_clients)) {
237                         ++num_viewers;
238                 }
239         }
240
241         last_seen_clients = new_last_seen_clients;
242         return num_viewers;
243 }
244 var log = function(str) {
245         console.log("[" + ((new Date).getTime()*1e-3).toFixed(3) + "] " + str);
246 }
247
248 // Set up a watcher to catch changes to the file, then do an initial read
249 // to make sure we have a copy.
250 fs.watch(path.dirname(json_filename), reread_file);
251 reread_file(null, path.basename(json_filename));
252
253 if (COUNT_FROM_VARNISH_LOG) {
254         // Note: We abuse serve_url as a regex.
255         var varnishncsa = child_process.spawn(
256                 'varnishncsa', ['-F', '%{%s}t %U %q tffb=%{Varnish:time_firstbyte}x',
257                 '-q', 'ReqURL ~ "^' + serve_url + '"']);
258         var rl = readline.createInterface({
259                 input: varnishncsa.stdout,
260                 output: varnishncsa.stdin,
261                 terminal: false
262         });
263
264         var uniques = [];
265         rl.on('line', function(line) {
266                 var v = line.match(/(\d+) .*\?ims=\d+&unique=(.*) tffb=(.*)/);
267                 if (v) {
268                         uniques[v[2]] = {
269                                 last_seen: (parseInt(v[1]) + parseFloat(v[3])) * 1e3,
270                                 grace: null,
271                         };
272                         log(v[1] + " " + v[2] + " " + v[3]);
273                 } else {
274                         log("VARNISHNCSA UNPARSEABLE LINE: " + line);
275                 }
276         });
277         setInterval(function() {
278                 var mtime = json.last_modified - 1000;  // Compensate for subsecond issues.
279                 var now = (new Date).getTime();
280                 var num_viewers = 0;
281
282                 for (var unique in uniques) {
283                         ++num_viewers;
284                         var last_seen = uniques[unique].last_seen;
285                         if (now - last_seen <= 5000) {
286                                 // We've seen this user in the last five seconds;
287                                 // it's okay.
288                                 continue;
289                         }
290                         if (last_seen >= mtime) {
291                                 // This user has the latest version;
292                                 // they are probably just hanging.
293                                 continue;
294                         }
295                         if (uniques[unique].grace === null) {
296                                 // They have five seconds after a new JSON has been
297                                 // provided to get get it, or they're out.
298                                 // We don't simply use mtime, since we don't want to
299                                 // reset the grace timer just because a new JSON is
300                                 // published.
301                                 uniques[unique].grace = mtime;
302                         }
303                         if (now - uniques[unique].grace > 5000) {
304                                 log("Timing out " + unique + " (last_seen=" + last_seen + ", now=" + now +
305                                         ", mtime=" + mtime, ", grace=" + uniques[unique].grace + ")");
306                                 delete uniques[unique];
307                                 --num_viewers;
308                         }
309                 }
310
311                 log(num_viewers + " entries in hash, mtime=" + mtime);
312                 viewer_count_override = num_viewers;
313         }, 1000);
314 }
315
316 var server = http.createServer();
317 server.on('request', function(request, response) {
318         var u = url.parse(request.url, true);
319         var ims = (u.query)['ims'];
320         var unique = (u.query)['unique'];
321
322         log(request.url);
323         if (u.pathname !== serve_url) {
324                 // This is not the request you are looking for.
325                 send_404(response);
326                 return;
327         }
328
329         mark_recently_seen(unique);
330
331         var accept_encoding = request.headers['accept-encoding'];
332         var accept_gzip;
333         if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
334                 accept_gzip = true;
335         } else {
336                 accept_gzip = false;
337         }
338
339         // If we already have something newer than what the user has,
340         // just send it out and be done with it.
341         if (json !== undefined && (!ims || json.last_modified > ims)) {
342                 send_json(response, ims, accept_gzip, count_viewers());
343                 return;
344         }
345
346         // OK, so we need to hang until we have something newer.
347         // Put the user on the wait list.
348         var client = {};
349         client.response = response;
350         client.request_id = request_id;
351         client.accept_gzip = accept_gzip;
352         client.unique = unique;
353         client.ims = ims;
354         sleeping_clients[request_id++] = client;
355
356         request.socket.client = client;
357 });
358 server.on('connection', function(socket) {
359         socket.on('close', function() {
360                 var client = socket.client;
361                 if (client) {
362                         mark_recently_seen(client.unique);
363                         delete sleeping_clients[client.request_id];
364                 }
365         });
366 });
367
368 server.listen(port);