1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var readline = require('readline');
12 var child_process = require('child_process');
13 var delta = require('../www/js/json_delta.js');
14 var hash_lookup = require('./hash-lookup.js');
17 var HISTORY_TO_KEEP = 5;
18 var MINIMUM_VERSION = null;
19 var COUNT_FROM_VARNISH_LOG = true;
22 var json_filename = '/srv/analysis.sesse.net/www/analysis.json';
23 if (process.argv.length >= 3) {
24 json_filename = process.argv[2];
27 // Expected destination filenames.
28 var serve_url = '/analysis.pl';
29 var hash_serve_url = '/hash';
30 if (process.argv.length >= 4) {
31 serve_url = process.argv[3];
33 if (process.argv.length >= 5) {
34 hash_serve_url = process.argv[4];
37 // TCP port to listen on.
39 if (process.argv.length >= 6) {
40 port = parseInt(process.argv[5]);
44 var grpc_backends = ["localhost:50051", "localhost:50052"];
45 if (process.argv.length >= 7) {
46 grpc_backends = process.argv[6].split(",");
48 hash_lookup.init(grpc_backends);
50 // If set to 1, we are already processing a JSON update and should not
51 // start a new one. If set to 2, we are _also_ having one in the queue.
54 // The current contents of the file to hand out, and its last modified time.
57 // The last five timestamps, and diffs from them to the latest version.
58 var historic_json = [];
61 // The list of clients that are waiting for new data to show up.
62 // Uniquely keyed by request_id so that we can take them out of
63 // the queue if they close the socket.
64 var sleeping_clients = {};
67 // List of when clients were last seen, keyed by their unique ID.
68 // Used to show a viewer count to the user.
69 var last_seen_clients = {};
71 // The timer used to touch the file every 30 seconds if nobody
72 // else does it for us. This makes sure we don't have clients
73 // hanging indefinitely (which might have them return errors).
74 var touch_timer = undefined;
76 // If we are behind Varnish, we can't count the number of clients
77 // ourselves, so we need to get it from parsing varnishncsa.
78 var viewer_count_override = undefined;
80 var replace_json = function(new_json_contents, mtime) {
81 // Generate the list of diffs from the last five versions.
82 if (json !== undefined) {
83 // If two versions have the same mtime, clients could have either.
84 // Note the fact, so that we never insert it.
85 if (json.last_modified == mtime) {
86 json.invalid_base = true;
88 if (!json.invalid_base) {
89 historic_json.push(json);
90 if (historic_json.length > HISTORY_TO_KEEP) {
91 historic_json.shift();
97 parsed: JSON.parse(new_json_contents),
98 plain: new_json_contents,
101 create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
102 // gzip the new version (non-delta), and put it into place.
103 zlib.gzip(new_json_contents, function(err, buffer) {
106 new_json.gzip = buffer;
108 diff_json = new_diff_json;
111 // Finally, wake up any sleeping clients.
112 possibly_wakeup_clients();
117 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
118 if (history_left.length == 0) {
123 var histobj = history_left.shift();
124 var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
125 var diff_text = JSON.stringify(diff);
126 zlib.gzip(diff_text, function(err, buffer) {
128 new_diff_json[histobj.last_modified] = {
132 last_modified: new_json.last_modified,
134 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
138 var reread_file = function(event, filename) {
139 if (filename != path.basename(json_filename)) {
142 if (json_lock >= 2) {
145 if (json_lock == 1) {
146 // Already processing; wait a bit.
148 setTimeout(function() { json_lock = 1; reread_file(event, filename); }, 100);
153 console.log("Rereading " + json_filename);
154 fs.open(json_filename, 'r', function(err, fd) {
156 fs.fstat(fd, function(err, st) {
158 var buffer = new Buffer(1048576);
159 fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
161 fs.close(fd, function() {
162 var new_json_contents = buffer.toString('utf8', 0, bytesRead);
163 replace_json(new_json_contents, st.mtime.getTime());
169 if (touch_timer !== undefined) {
170 clearTimeout(touch_timer);
172 touch_timer = setTimeout(function() {
173 console.log("Touching analysis.json due to no other activity");
174 var now = Date.now() / 1000;
175 fs.utimes(json_filename, now, now);
178 var possibly_wakeup_clients = function() {
179 var num_viewers = count_viewers();
180 for (var i in sleeping_clients) {
181 mark_recently_seen(sleeping_clients[i].unique);
182 send_json(sleeping_clients[i].response,
183 sleeping_clients[i].ims,
184 sleeping_clients[i].accept_gzip,
187 sleeping_clients = {};
189 var send_404 = function(response) {
190 response.writeHead(404, {
191 'Content-Type': 'text/plain',
193 response.write('Something went wrong. Sorry.');
196 var send_json = function(response, ims, accept_gzip, num_viewers) {
197 var this_json = diff_json[ims] || json;
200 'Content-Type': 'text/json',
201 'X-RGLM': this_json.last_modified,
202 'X-RGNV': num_viewers,
203 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
204 'Vary': 'Accept-Encoding',
207 if (MINIMUM_VERSION) {
208 headers['X-RGMV'] = MINIMUM_VERSION;
212 headers['Content-Length'] = this_json.gzip.length;
213 headers['Content-Encoding'] = 'gzip';
214 response.writeHead(200, headers);
215 response.write(this_json.gzip);
217 headers['Content-Length'] = this_json.plain.length;
218 response.writeHead(200, headers);
219 response.write(this_json.plain);
223 var mark_recently_seen = function(unique) {
225 last_seen_clients[unique] = (new Date).getTime();
228 var count_viewers = function() {
229 if (viewer_count_override !== undefined) {
230 return viewer_count_override;
233 var now = (new Date).getTime();
235 // Go through and remove old viewers, and count them at the same time.
236 var new_last_seen_clients = {};
238 for (var unique in last_seen_clients) {
239 if (now - last_seen_clients[unique] < 5000) {
241 new_last_seen_clients[unique] = last_seen_clients[unique];
245 // Also add sleeping clients that we would otherwise assume timed out.
246 for (var request_id in sleeping_clients) {
247 var unique = sleeping_clients[request_id].unique;
248 if (unique && !(unique in new_last_seen_clients)) {
253 last_seen_clients = new_last_seen_clients;
256 var log = function(str) {
257 console.log("[" + ((new Date).getTime()*1e-3).toFixed(3) + "] " + str);
260 // Set up a watcher to catch changes to the file, then do an initial read
261 // to make sure we have a copy.
262 fs.watch(path.dirname(json_filename), reread_file);
263 reread_file(null, path.basename(json_filename));
265 if (COUNT_FROM_VARNISH_LOG) {
266 // Note: We abuse serve_url as a regex.
267 var varnishncsa = child_process.spawn(
268 'varnishncsa', ['-F', '%{%s}t %U %q tffb=%{Varnish:time_firstbyte}x',
269 '-q', 'ReqURL ~ "^' + serve_url + '"']);
270 var rl = readline.createInterface({
271 input: varnishncsa.stdout,
272 output: varnishncsa.stdin,
277 rl.on('line', function(line) {
278 var v = line.match(/(\d+) .*\?ims=\d+&unique=(.*) tffb=(.*)/);
281 last_seen: (parseInt(v[1]) + parseFloat(v[3])) * 1e3,
284 log(v[1] + " " + v[2] + " " + v[3]);
286 log("VARNISHNCSA UNPARSEABLE LINE: " + line);
289 setInterval(function() {
290 var mtime = json.last_modified - 1000; // Compensate for subsecond issues.
291 var now = (new Date).getTime();
294 for (var unique in uniques) {
296 var last_seen = uniques[unique].last_seen;
297 if (now - last_seen <= 5000) {
298 // We've seen this user in the last five seconds;
302 if (last_seen >= mtime) {
303 // This user has the latest version;
304 // they are probably just hanging.
307 if (uniques[unique].grace === null) {
308 // They have five seconds after a new JSON has been
309 // provided to get get it, or they're out.
310 // We don't simply use mtime, since we don't want to
311 // reset the grace timer just because a new JSON is
313 uniques[unique].grace = mtime;
315 if (now - uniques[unique].grace > 5000) {
316 log("Timing out " + unique + " (last_seen=" + last_seen + ", now=" + now +
317 ", mtime=" + mtime, ", grace=" + uniques[unique].grace + ")");
318 delete uniques[unique];
323 log(num_viewers + " entries in hash, mtime=" + mtime);
324 viewer_count_override = num_viewers;
328 var server = http.createServer();
329 server.on('request', function(request, response) {
330 var u = url.parse(request.url, true);
331 var ims = (u.query)['ims'];
332 var unique = (u.query)['unique'];
335 if (u.pathname === hash_serve_url) {
336 var fen = (u.query)['fen'];
337 hash_lookup.handle_request(fen, response);
340 if (u.pathname !== serve_url) {
341 // This is not the request you are looking for.
346 mark_recently_seen(unique);
348 var accept_encoding = request.headers['accept-encoding'];
350 if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
356 // If we already have something newer than what the user has,
357 // just send it out and be done with it.
358 if (json !== undefined && (!ims || json.last_modified > ims)) {
359 send_json(response, ims, accept_gzip, count_viewers());
363 // OK, so we need to hang until we have something newer.
364 // Put the user on the wait list.
366 client.response = response;
367 client.request_id = request_id;
368 client.accept_gzip = accept_gzip;
369 client.unique = unique;
371 sleeping_clients[request_id++] = client;
373 request.socket.client = client;
375 server.on('connection', function(socket) {
376 socket.on('close', function() {
377 var client = socket.client;
379 mark_recently_seen(client.unique);
380 delete sleeping_clients[client.request_id];