]> git.sesse.net Git - remoteglot/blob - server/serve-analysis.js
Handle streaming PGNs, like from Lichess (although this might break non-streaming...
[remoteglot] / server / serve-analysis.js
1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
3
4 // Modules.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var readline = require('readline');
12 var child_process = require('child_process');
13 var delta = require('../www/js/json_delta.js');
14 var hash_lookup = require('./hash-lookup.js');
15
16 // Constants.
17 var HISTORY_TO_KEEP = 5;
18 var MINIMUM_VERSION = null;
19 var COUNT_FROM_VARNISH_LOG = true;
20
21 // Filename to serve.
22 var json_filename = '/srv/analysis.sesse.net/www/analysis.json';
23 if (process.argv.length >= 3) {
24         json_filename = process.argv[2];
25 }
26 var html_filename = '/srv/analysis.sesse.net/www/index.html';
27
28 // Expected destination filenames.
29 var serve_url = '/analysis.pl';
30 var html_serve_url = '/index-inline.html';
31 var hash_serve_url = '/hash';
32 if (process.argv.length >= 4) {
33         serve_url = process.argv[3];
34 }
35 if (process.argv.length >= 5) {
36         hash_serve_url = process.argv[4];
37 }
38
39 // TCP port to listen on.
40 var port = 5000;
41 if (process.argv.length >= 6) {
42         port = parseInt(process.argv[5]);
43 }
44
45 // gRPC backends.
46 var grpc_backends = ["localhost:50051", "localhost:50052"];
47 if (process.argv.length >= 7) {
48         grpc_backends = process.argv[6].split(",");
49 }
50 hash_lookup.init(grpc_backends);
51
52 // If set to 1, we are already processing a JSON update and should not
53 // start a new one. If set to 2, we are _also_ having one in the queue.
54 var json_lock = 0;
55
56 // The current contents of the file to hand out, and its last modified time.
57 var json = undefined;
58 var html = undefined;
59
60 // The last five timestamps, and diffs from them to the latest version.
61 var historic_json = [];
62 var diff_json = {};
63
64 // The list of clients that are waiting for new data to show up.
65 // Uniquely keyed by request_id so that we can take them out of
66 // the queue if they close the socket.
67 var sleeping_clients = {};
68 var request_id = 0;
69
70 // List of when clients were last seen, keyed by their unique ID.
71 // Used to show a viewer count to the user.
72 var last_seen_clients = {};
73
74 // The timer used to touch the file every 30 seconds if nobody
75 // else does it for us. This makes sure we don't have clients
76 // hanging indefinitely (which might have them return errors).
77 var touch_timer = undefined;
78
79 // If we are behind Varnish, we can't count the number of clients
80 // ourselves, so we need to get it from parsing varnishncsa.
81 var viewer_count_override = undefined;
82
83 var replace_json = function(new_json_contents, mtime) {
84         // Generate the list of diffs from the last five versions.
85         if (json !== undefined) {
86                 // If two versions have the same mtime, clients could have either.
87                 // Note the fact, so that we never insert it.
88                 if (json.last_modified == mtime) {
89                         json.invalid_base = true;
90                 }
91                 if (!json.invalid_base) {
92                         historic_json.push(json);
93                         if (historic_json.length > HISTORY_TO_KEEP) {
94                                 historic_json.shift();
95                         }
96                 }
97         }
98
99         var parsed = JSON.parse(new_json_contents);
100
101         if (parsed['internal']) {
102                 if (parsed['internal']['grpc_backends'] &&
103                     hash_lookup.need_reinit(parsed['internal']['grpc_backends'])) {
104                         hash_lookup.init(parsed['internal']['grpc_backends']);
105                 }
106                 delete parsed['internal'];
107                 new_json_contents = JSON.stringify(parsed);
108         }
109
110         var new_json = {
111                 parsed: parsed,
112                 plain: new_json_contents,
113                 last_modified: mtime
114         };
115         create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
116                 // gzip the new version (non-delta), and put it into place.
117                 zlib.gzip(new_json_contents, function(err, buffer) {
118                         if (err) throw err;
119
120                         new_json.gzip = buffer;
121                         json = new_json;
122                         diff_json = new_diff_json;
123                         json_lock = 0;
124
125                         // Finally, wake up any sleeping clients.
126                         possibly_wakeup_clients();
127                 });
128         });
129 }
130
131 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
132         if (history_left.length == 0) {
133                 cb(new_diff_json);
134                 return;
135         }
136
137         var histobj = history_left.shift();
138         var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
139         var diff_text = JSON.stringify(diff);
140
141         // Verify that the delta is correct
142         var base = JSON.parse(histobj.plain);
143         delta.JSON_delta.patch(base, diff);
144         var correct_pv = JSON.stringify(base['pv']);
145         var wrong_pv = JSON.stringify(new_json.parsed['pv']);
146         if (correct_pv !== wrong_pv) {
147                 console.log("Patch went wrong:", histobj.plain, new_json.plain);
148                 exit();
149         }
150
151         zlib.gzip(diff_text, function(err, buffer) {
152                 if (err) throw err;
153                 new_diff_json[histobj.last_modified] = {
154                         parsed: diff,
155                         plain: diff_text,
156                         gzip: buffer,
157                         last_modified: new_json.last_modified,
158                 };
159                 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
160         });
161 }
162
163 function read_entire_file(filename, callback) {
164         fs.open(filename, 'r', function(err, fd) {
165                 if (err) throw err;
166                 fs.fstat(fd, function(err, st) {
167                         if (err) throw err;
168                         var buffer = new Buffer(1048576);
169                         fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
170                                 if (err) throw err;
171                                 fs.close(fd, function() {
172                                         var contents = buffer.toString('utf8', 0, bytesRead);
173                                         callback(contents, st.mtime.getTime());
174                                 });
175                         });
176                 });
177         });
178 }
179
180 var reread_file = function(event, filename) {
181         if (filename != path.basename(json_filename)) {
182                 return;
183         }
184         if (json_lock >= 2) {
185                 return;
186         }
187         if (json_lock == 1) {
188                 // Already processing; wait a bit.
189                 json_lock = 2;
190                 setTimeout(function() { if (json_lock == 2) json_lock = 1; reread_file(event, filename); }, 100);
191                 return;
192         }
193         json_lock = 1;
194
195         console.log("Rereading " + json_filename);
196         read_entire_file(json_filename, function(new_json_contents, mtime) {
197                 replace_json(new_json_contents, mtime);
198
199                 // The HTML can go async, it's not too hopeless if it's out of date by a few milliseconds
200                 read_entire_file(html_filename, function(new_html_contents, html_mtime) {
201                         var json_headers = {
202                                 'X-RGLM': mtime,
203                                 'X-RGNV': count_viewers(),  // May be slightly out of date.
204                                 'Date': (new Date).toUTCString(),
205                         };
206                         if (MINIMUM_VERSION) {
207                                 json_headers['X-RGMV'] = MINIMUM_VERSION;
208                         }
209                         let inline_json = {
210                                 'data': JSON.parse(new_json_contents),
211                                 'headers': json_headers,
212                         };
213                         delete inline_json['data']['internal'];
214
215                         new_html_contents = new_html_contents.replace(
216                                 '/*REPLACE:inlinejson*/',
217                                 'window.inline_json=' + JSON.stringify(inline_json) + ';');
218                         zlib.gzip(new_html_contents, function(err, buffer) {
219                                 if (err) throw err;
220                                 html = {
221                                         plain: new_html_contents,
222                                         gzip: buffer,
223                                 };
224                         });
225                 });
226         });
227
228         if (touch_timer !== undefined) {
229                 clearTimeout(touch_timer);
230         }
231         touch_timer = setTimeout(function() {
232                 console.log("Touching analysis.json due to no other activity");
233                 var now = Date.now() / 1000;
234                 fs.utimes(json_filename, now, now, function() {});
235         }, 30000);
236 }
237 var possibly_wakeup_clients = function() {
238         var num_viewers = count_viewers();
239         for (var i in sleeping_clients) {
240                 mark_recently_seen(sleeping_clients[i].unique);
241                 send_json(sleeping_clients[i].response,
242                           sleeping_clients[i].ims,
243                           sleeping_clients[i].accept_gzip,
244                           num_viewers);
245         }
246         sleeping_clients = {};
247 }
248 var send_404 = function(response) {
249         response.writeHead(404, {
250                 'Content-Type': 'text/plain',
251         });
252         response.write('Something went wrong. Sorry.');
253         response.end();
254 }
255 var send_json = function(response, ims, accept_gzip, num_viewers) {
256         var this_json = diff_json[ims] || json;
257
258         var headers = {
259                 'Content-Type': 'text/json',
260                 'X-RGLM': this_json.last_modified,
261                 'X-RGNV': num_viewers,
262                 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
263                 'Vary': 'Accept-Encoding',
264         };
265
266         if (MINIMUM_VERSION) {
267                 headers['X-RGMV'] = MINIMUM_VERSION;
268         }
269
270         if (accept_gzip) {
271                 headers['Content-Length'] = this_json.gzip.length;
272                 headers['Content-Encoding'] = 'gzip';
273                 response.writeHead(200, headers);
274                 response.write(this_json.gzip);
275         } else {
276                 headers['Content-Length'] = this_json.plain.length;
277                 response.writeHead(200, headers);
278                 response.write(this_json.plain);
279         }
280         response.end();
281 }
282 var send_html = function(response, accept_gzip, num_viewers) {
283         var headers = {
284                 'Content-type': 'text/html; charset=utf-8',
285                 'Vary': 'Accept-Encoding',
286         };
287
288         if (accept_gzip) {
289                 headers['Content-Length'] = html.gzip.length;
290                 headers['Content-Encoding'] = 'gzip';
291                 response.writeHead(200, headers);
292                 response.write(html.gzip);
293         } else {
294                 headers['Content-Length'] = html.plain.length;
295                 response.writeHead(200, headers);
296                 response.write(html.plain);
297         }
298         response.end();
299 }
300 var mark_recently_seen = function(unique) {
301         if (unique) {
302                 last_seen_clients[unique] = (new Date).getTime();
303         }
304 }
305 var count_viewers = function() {
306         if (viewer_count_override !== undefined) {
307                 return viewer_count_override;
308         }
309
310         var now = (new Date).getTime();
311
312         // Go through and remove old viewers, and count them at the same time.
313         var new_last_seen_clients = {};
314         var num_viewers = 0;
315         for (var unique in last_seen_clients) {
316                 if (now - last_seen_clients[unique] < 5000) {
317                         ++num_viewers;
318                         new_last_seen_clients[unique] = last_seen_clients[unique];
319                 }
320         }
321
322         // Also add sleeping clients that we would otherwise assume timed out.
323         for (var request_id in sleeping_clients) {
324                 var unique = sleeping_clients[request_id].unique;
325                 if (unique && !(unique in new_last_seen_clients)) {
326                         ++num_viewers;
327                 }
328         }
329
330         last_seen_clients = new_last_seen_clients;
331         return num_viewers;
332 }
333 var log = function(str) {
334         console.log("[" + ((new Date).getTime()*1e-3).toFixed(3) + "] " + str);
335 }
336
337 // Set up a watcher to catch changes to the file, then do an initial read
338 // to make sure we have a copy.
339 fs.watch(path.dirname(json_filename), reread_file);
340 reread_file(null, path.basename(json_filename));
341
342 if (COUNT_FROM_VARNISH_LOG) {
343         // Note: We abuse serve_url as a regex.
344         var varnishncsa = child_process.spawn(
345                 'varnishncsa', ['-F', '%{%s}t %U %q tffb=%{Varnish:time_firstbyte}x',
346                 '-q', 'ReqURL ~ "^(' + serve_url + '|' + html_serve_url + ')"']);
347         var rl = readline.createInterface({
348                 input: varnishncsa.stdout,
349                 output: varnishncsa.stdin,
350                 terminal: false
351         });
352
353         var uniques = [];
354         rl.on('line', function(line) {
355                 var v = line.match(/(\d+) .*\?ims=\d+&unique=(.*) tffb=(.*)/);
356                 if (v) {
357                         uniques[v[2]] = {
358                                 last_seen: (parseInt(v[1]) + parseFloat(v[3])) * 1e3,
359                                 grace: null,
360                         };
361                         log(v[1] + " " + v[2] + " " + v[3]);
362                 } else {
363                         log("VARNISHNCSA UNPARSEABLE LINE: " + line);
364                 }
365         });
366         setInterval(function() {
367                 var mtime = json.last_modified - 1000;  // Compensate for subsecond issues.
368                 var now = (new Date).getTime();
369                 var num_viewers = 0;
370
371                 for (var unique in uniques) {
372                         ++num_viewers;
373                         var last_seen = uniques[unique].last_seen;
374                         if (now - last_seen <= 5000) {
375                                 // We've seen this user in the last five seconds;
376                                 // it's okay.
377                                 continue;
378                         }
379                         if (last_seen >= mtime) {
380                                 // This user has the latest version;
381                                 // they are probably just hanging.
382                                 continue;
383                         }
384                         if (uniques[unique].grace === null) {
385                                 // They have five seconds after a new JSON has been
386                                 // provided to get get it, or they're out.
387                                 // We don't simply use mtime, since we don't want to
388                                 // reset the grace timer just because a new JSON is
389                                 // published.
390                                 uniques[unique].grace = mtime;
391                         }
392                         if (now - uniques[unique].grace > 5000) {
393                                 log("Timing out " + unique + " (last_seen=" + last_seen + ", now=" + now +
394                                         ", mtime=" + mtime, ", grace=" + uniques[unique].grace + ")");
395                                 delete uniques[unique];
396                                 --num_viewers;
397                         }
398                 }
399
400                 log(num_viewers + " entries in hash, mtime=" + mtime);
401                 viewer_count_override = num_viewers;
402         }, 1000);
403 }
404
405 var server = http.createServer();
406 server.on('request', function(request, response) {
407         var u = url.parse(request.url, true);
408         var ims = (u.query)['ims'];
409         var unique = (u.query)['unique'];
410
411         log(request.url);
412         if (u.pathname === hash_serve_url) {
413                 var fen = (u.query)['fen'];
414                 hash_lookup.handle_request(fen, response);
415                 return;
416         }
417         if (u.pathname !== serve_url && u.pathname !== html_serve_url) {
418                 // This is not the request you are looking for.
419                 send_404(response);
420                 return;
421         }
422
423         var accept_encoding = request.headers['accept-encoding'];
424         let accept_gzip = (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/));
425
426         if (u.pathname === html_serve_url) {
427                 send_html(response, accept_gzip, count_viewers());
428                 return;
429         }
430
431         mark_recently_seen(unique);
432
433         // If we already have something newer than what the user has,
434         // just send it out and be done with it.
435         if (json !== undefined && (!ims || json.last_modified > ims)) {
436                 send_json(response, ims, accept_gzip, count_viewers());
437                 return;
438         }
439
440         // OK, so we need to hang until we have something newer.
441         // Put the user on the wait list.
442         var client = {};
443         client.response = response;
444         client.request_id = request_id;
445         client.accept_gzip = accept_gzip;
446         client.unique = unique;
447         client.ims = ims;
448         sleeping_clients[request_id++] = client;
449
450         request.socket.client = client;
451 });
452 server.on('connection', function(socket) {
453         socket.on('close', function() {
454                 var client = socket.client;
455                 if (client) {
456                         mark_recently_seen(client.unique);
457                         delete sleeping_clients[client.request_id];
458                 }
459         });
460 });
461
462 server.listen(port);