]> git.sesse.net Git - remoteglot/blob - server/serve-analysis.js
Small refactoring in serve-analysis.js.
[remoteglot] / server / serve-analysis.js
1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
3
4 // Modules.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var readline = require('readline');
12 var child_process = require('child_process');
13 var delta = require('../www/js/json_delta.js');
14 var hash_lookup = require('./hash-lookup.js');
15
16 // Constants.
17 var HISTORY_TO_KEEP = 5;
18 var MINIMUM_VERSION = null;
19 var COUNT_FROM_VARNISH_LOG = true;
20
21 // Filename to serve.
22 var json_filename = '/srv/analysis.sesse.net/www/analysis.json';
23 if (process.argv.length >= 3) {
24         json_filename = process.argv[2];
25 }
26
27 // Expected destination filenames.
28 var serve_url = '/analysis.pl';
29 var hash_serve_url = '/hash';
30 if (process.argv.length >= 4) {
31         serve_url = process.argv[3];
32 }
33 if (process.argv.length >= 5) {
34         hash_serve_url = process.argv[4];
35 }
36
37 // TCP port to listen on.
38 var port = 5000;
39 if (process.argv.length >= 6) {
40         port = parseInt(process.argv[5]);
41 }
42
43 // gRPC backends.
44 var grpc_backends = ["localhost:50051", "localhost:50052"];
45 if (process.argv.length >= 7) {
46         grpc_backends = process.argv[6].split(",");
47 }
48 hash_lookup.init(grpc_backends);
49
50 // If set to 1, we are already processing a JSON update and should not
51 // start a new one. If set to 2, we are _also_ having one in the queue.
52 var json_lock = 0;
53
54 // The current contents of the file to hand out, and its last modified time.
55 var json = undefined;
56
57 // The last five timestamps, and diffs from them to the latest version.
58 var historic_json = [];
59 var diff_json = {};
60
61 // The list of clients that are waiting for new data to show up.
62 // Uniquely keyed by request_id so that we can take them out of
63 // the queue if they close the socket.
64 var sleeping_clients = {};
65 var request_id = 0;
66
67 // List of when clients were last seen, keyed by their unique ID.
68 // Used to show a viewer count to the user.
69 var last_seen_clients = {};
70
71 // The timer used to touch the file every 30 seconds if nobody
72 // else does it for us. This makes sure we don't have clients
73 // hanging indefinitely (which might have them return errors).
74 var touch_timer = undefined;
75
76 // If we are behind Varnish, we can't count the number of clients
77 // ourselves, so we need to get it from parsing varnishncsa.
78 var viewer_count_override = undefined;
79
80 var replace_json = function(new_json_contents, mtime) {
81         // Generate the list of diffs from the last five versions.
82         if (json !== undefined) {
83                 // If two versions have the same mtime, clients could have either.
84                 // Note the fact, so that we never insert it.
85                 if (json.last_modified == mtime) {
86                         json.invalid_base = true;
87                 }
88                 if (!json.invalid_base) {
89                         historic_json.push(json);
90                         if (historic_json.length > HISTORY_TO_KEEP) {
91                                 historic_json.shift();
92                         }
93                 }
94         }
95
96         var parsed = JSON.parse(new_json_contents);
97
98         if (parsed['internal']) {
99                 if (parsed['internal']['grpc_backends'] &&
100                     hash_lookup.need_reinit(parsed['internal']['grpc_backends'])) {
101                         hash_lookup.init(parsed['internal']['grpc_backends']);
102                 }
103                 delete parsed['internal'];
104                 new_json_contents = JSON.stringify(parsed);
105         }
106
107         var new_json = {
108                 parsed: parsed,
109                 plain: new_json_contents,
110                 last_modified: mtime
111         };
112         create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
113                 // gzip the new version (non-delta), and put it into place.
114                 zlib.gzip(new_json_contents, function(err, buffer) {
115                         if (err) throw err;
116
117                         new_json.gzip = buffer;
118                         json = new_json;
119                         diff_json = new_diff_json;
120                         json_lock = 0;
121
122                         // Finally, wake up any sleeping clients.
123                         possibly_wakeup_clients();
124                 });
125         });
126 }
127
128 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
129         if (history_left.length == 0) {
130                 cb(new_diff_json);
131                 return;
132         }
133
134         var histobj = history_left.shift();
135         var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
136         var diff_text = JSON.stringify(diff);
137
138         // Verify that the delta is correct
139         var base = JSON.parse(histobj.plain);
140         delta.JSON_delta.patch(base, diff);
141         var correct_pv = JSON.stringify(base['pv']);
142         var wrong_pv = JSON.stringify(new_json.parsed['pv']);
143         if (correct_pv !== wrong_pv) {
144                 console.log("Patch went wrong:", histobj.plain, new_json.plain);
145                 exit();
146         }
147
148         zlib.gzip(diff_text, function(err, buffer) {
149                 if (err) throw err;
150                 new_diff_json[histobj.last_modified] = {
151                         parsed: diff,
152                         plain: diff_text,
153                         gzip: buffer,
154                         last_modified: new_json.last_modified,
155                 };
156                 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
157         });
158 }
159
160 function read_entire_file(filename, callback) {
161         fs.open(filename, 'r', function(err, fd) {
162                 if (err) throw err;
163                 fs.fstat(fd, function(err, st) {
164                         if (err) throw err;
165                         var buffer = new Buffer(1048576);
166                         fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
167                                 if (err) throw err;
168                                 fs.close(fd, function() {
169                                         var contents = buffer.toString('utf8', 0, bytesRead);
170                                         callback(contents, st.mtime.getTime());
171                                 });
172                         });
173                 });
174         });
175 }
176
177 var reread_file = function(event, filename) {
178         if (filename != path.basename(json_filename)) {
179                 return;
180         }
181         if (json_lock >= 2) {
182                 return;
183         }
184         if (json_lock == 1) {
185                 // Already processing; wait a bit.
186                 json_lock = 2;
187                 setTimeout(function() { if (json_lock == 2) json_lock = 1; reread_file(event, filename); }, 100);
188                 return;
189         }
190         json_lock = 1;
191
192         console.log("Rereading " + json_filename);
193         read_entire_file(json_filename, function(new_json_contents, mtime) {
194                 replace_json(new_json_contents, mtime);
195         });
196
197         if (touch_timer !== undefined) {
198                 clearTimeout(touch_timer);
199         }
200         touch_timer = setTimeout(function() {
201                 console.log("Touching analysis.json due to no other activity");
202                 var now = Date.now() / 1000;
203                 fs.utimes(json_filename, now, now, function() {});
204         }, 30000);
205 }
206 var possibly_wakeup_clients = function() {
207         var num_viewers = count_viewers();
208         for (var i in sleeping_clients) {
209                 mark_recently_seen(sleeping_clients[i].unique);
210                 send_json(sleeping_clients[i].response,
211                           sleeping_clients[i].ims,
212                           sleeping_clients[i].accept_gzip,
213                           num_viewers);
214         }
215         sleeping_clients = {};
216 }
217 var send_404 = function(response) {
218         response.writeHead(404, {
219                 'Content-Type': 'text/plain',
220         });
221         response.write('Something went wrong. Sorry.');
222         response.end();
223 }
224 var send_json = function(response, ims, accept_gzip, num_viewers) {
225         var this_json = diff_json[ims] || json;
226
227         var headers = {
228                 'Content-Type': 'text/json',
229                 'X-RGLM': this_json.last_modified,
230                 'X-RGNV': num_viewers,
231                 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
232                 'Vary': 'Accept-Encoding',
233         };
234
235         if (MINIMUM_VERSION) {
236                 headers['X-RGMV'] = MINIMUM_VERSION;
237         }
238
239         if (accept_gzip) {
240                 headers['Content-Length'] = this_json.gzip.length;
241                 headers['Content-Encoding'] = 'gzip';
242                 response.writeHead(200, headers);
243                 response.write(this_json.gzip);
244         } else {
245                 headers['Content-Length'] = this_json.plain.length;
246                 response.writeHead(200, headers);
247                 response.write(this_json.plain);
248         }
249         response.end();
250 }
251 var mark_recently_seen = function(unique) {
252         if (unique) {
253                 last_seen_clients[unique] = (new Date).getTime();
254         }
255 }
256 var count_viewers = function() {
257         if (viewer_count_override !== undefined) {
258                 return viewer_count_override;
259         }
260
261         var now = (new Date).getTime();
262
263         // Go through and remove old viewers, and count them at the same time.
264         var new_last_seen_clients = {};
265         var num_viewers = 0;
266         for (var unique in last_seen_clients) {
267                 if (now - last_seen_clients[unique] < 5000) {
268                         ++num_viewers;
269                         new_last_seen_clients[unique] = last_seen_clients[unique];
270                 }
271         }
272
273         // Also add sleeping clients that we would otherwise assume timed out.
274         for (var request_id in sleeping_clients) {
275                 var unique = sleeping_clients[request_id].unique;
276                 if (unique && !(unique in new_last_seen_clients)) {
277                         ++num_viewers;
278                 }
279         }
280
281         last_seen_clients = new_last_seen_clients;
282         return num_viewers;
283 }
284 var log = function(str) {
285         console.log("[" + ((new Date).getTime()*1e-3).toFixed(3) + "] " + str);
286 }
287
288 // Set up a watcher to catch changes to the file, then do an initial read
289 // to make sure we have a copy.
290 fs.watch(path.dirname(json_filename), reread_file);
291 reread_file(null, path.basename(json_filename));
292
293 if (COUNT_FROM_VARNISH_LOG) {
294         // Note: We abuse serve_url as a regex.
295         var varnishncsa = child_process.spawn(
296                 'varnishncsa', ['-F', '%{%s}t %U %q tffb=%{Varnish:time_firstbyte}x',
297                 '-q', 'ReqURL ~ "^' + serve_url + '"']);
298         var rl = readline.createInterface({
299                 input: varnishncsa.stdout,
300                 output: varnishncsa.stdin,
301                 terminal: false
302         });
303
304         var uniques = [];
305         rl.on('line', function(line) {
306                 var v = line.match(/(\d+) .*\?ims=\d+&unique=(.*) tffb=(.*)/);
307                 if (v) {
308                         uniques[v[2]] = {
309                                 last_seen: (parseInt(v[1]) + parseFloat(v[3])) * 1e3,
310                                 grace: null,
311                         };
312                         log(v[1] + " " + v[2] + " " + v[3]);
313                 } else {
314                         log("VARNISHNCSA UNPARSEABLE LINE: " + line);
315                 }
316         });
317         setInterval(function() {
318                 var mtime = json.last_modified - 1000;  // Compensate for subsecond issues.
319                 var now = (new Date).getTime();
320                 var num_viewers = 0;
321
322                 for (var unique in uniques) {
323                         ++num_viewers;
324                         var last_seen = uniques[unique].last_seen;
325                         if (now - last_seen <= 5000) {
326                                 // We've seen this user in the last five seconds;
327                                 // it's okay.
328                                 continue;
329                         }
330                         if (last_seen >= mtime) {
331                                 // This user has the latest version;
332                                 // they are probably just hanging.
333                                 continue;
334                         }
335                         if (uniques[unique].grace === null) {
336                                 // They have five seconds after a new JSON has been
337                                 // provided to get get it, or they're out.
338                                 // We don't simply use mtime, since we don't want to
339                                 // reset the grace timer just because a new JSON is
340                                 // published.
341                                 uniques[unique].grace = mtime;
342                         }
343                         if (now - uniques[unique].grace > 5000) {
344                                 log("Timing out " + unique + " (last_seen=" + last_seen + ", now=" + now +
345                                         ", mtime=" + mtime, ", grace=" + uniques[unique].grace + ")");
346                                 delete uniques[unique];
347                                 --num_viewers;
348                         }
349                 }
350
351                 log(num_viewers + " entries in hash, mtime=" + mtime);
352                 viewer_count_override = num_viewers;
353         }, 1000);
354 }
355
356 var server = http.createServer();
357 server.on('request', function(request, response) {
358         var u = url.parse(request.url, true);
359         var ims = (u.query)['ims'];
360         var unique = (u.query)['unique'];
361
362         log(request.url);
363         if (u.pathname === hash_serve_url) {
364                 var fen = (u.query)['fen'];
365                 hash_lookup.handle_request(fen, response);
366                 return;
367         }
368         if (u.pathname !== serve_url) {
369                 // This is not the request you are looking for.
370                 send_404(response);
371                 return;
372         }
373
374         mark_recently_seen(unique);
375
376         var accept_encoding = request.headers['accept-encoding'];
377         var accept_gzip;
378         if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
379                 accept_gzip = true;
380         } else {
381                 accept_gzip = false;
382         }
383
384         // If we already have something newer than what the user has,
385         // just send it out and be done with it.
386         if (json !== undefined && (!ims || json.last_modified > ims)) {
387                 send_json(response, ims, accept_gzip, count_viewers());
388                 return;
389         }
390
391         // OK, so we need to hang until we have something newer.
392         // Put the user on the wait list.
393         var client = {};
394         client.response = response;
395         client.request_id = request_id;
396         client.accept_gzip = accept_gzip;
397         client.unique = unique;
398         client.ims = ims;
399         sleeping_clients[request_id++] = client;
400
401         request.socket.client = client;
402 });
403 server.on('connection', function(socket) {
404         socket.on('close', function() {
405                 var client = socket.client;
406                 if (client) {
407                         mark_recently_seen(client.unique);
408                         delete sleeping_clients[client.request_id];
409                 }
410         });
411 });
412
413 server.listen(port);