Make serve-analysis.js capable of listening on a different port.
[remoteglot] / www / serve-analysis.js
1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
3
4 // Modules.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var delta = require('./js/json_delta.js');
12
13 // Constants.
14 var JSON_FILENAME = '/srv/analysis.sesse.net/www/analysis.json';
15 var HISTORY_TO_KEEP = 5;
16 var MINIMUM_VERSION = null;
17
18 // TCP port to listen on; can be overridden with flags.
19 var port = 5000;
20
21 // If set to 1, we are already processing a JSON update and should not
22 // start a new one. If set to 2, we are _also_ having one in the queue.
23 var json_lock = 0;
24
25 // The current contents of the file to hand out, and its last modified time.
26 var json = undefined;
27
28 // The last five timestamps, and diffs from them to the latest version.
29 var historic_json = [];
30 var diff_json = {};
31
32 // The list of clients that are waiting for new data to show up.
33 // Uniquely keyed by request_id so that we can take them out of
34 // the queue if they close the socket.
35 var sleeping_clients = {};
36 var request_id = 0;
37
38 // List of when clients were last seen, keyed by their unique ID.
39 // Used to show a viewer count to the user.
40 var last_seen_clients = {};
41
42 // The timer used to touch the file every 30 seconds if nobody
43 // else does it for us. This makes sure we don't have clients
44 // hanging indefinitely (which might have them return errors).
45 var touch_timer = undefined;
46
47 // If we are behind Varnish, we can't count the number of clients
48 // ourselves, so some external log-tailing daemon needs to tell us.
49 var viewer_count_override = undefined;
50
51 var replace_json = function(new_json_contents, mtime) {
52         // Generate the list of diffs from the last five versions.
53         if (json !== undefined) {
54                 // If two versions have the same mtime, clients could have either.
55                 // Note the fact, so that we never insert it.
56                 if (json.last_modified == mtime) {
57                         json.invalid_base = true;
58                 }
59                 if (!json.invalid_base) {
60                         historic_json.push(json);
61                         if (historic_json.length > HISTORY_TO_KEEP) {
62                                 historic_json.shift();
63                         }
64                 }
65         }
66
67         var new_json = {
68                 parsed: JSON.parse(new_json_contents),
69                 plain: new_json_contents,
70                 last_modified: mtime
71         };
72         create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
73                 // gzip the new version (non-delta), and put it into place.
74                 zlib.gzip(new_json_contents, function(err, buffer) {
75                         if (err) throw err;
76
77                         new_json.gzip = buffer;
78                         json = new_json;
79                         diff_json = new_diff_json;
80                         json_lock = 0;
81
82                         // Finally, wake up any sleeping clients.
83                         possibly_wakeup_clients();
84                 });
85         });
86 }
87
88 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
89         if (history_left.length == 0) {
90                 cb(new_diff_json);
91                 return;
92         }
93
94         var histobj = history_left.shift();
95         var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
96         var diff_text = JSON.stringify(diff);
97         zlib.gzip(diff_text, function(err, buffer) {
98                 if (err) throw err;
99                 new_diff_json[histobj.last_modified] = {
100                         parsed: diff,
101                         plain: diff_text,
102                         gzip: buffer,
103                         last_modified: new_json.last_modified,
104                 };
105                 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
106         });
107 }
108
109 var reread_file = function(event, filename) {
110         if (filename != path.basename(JSON_FILENAME)) {
111                 return;
112         }
113         if (json_lock >= 2) {
114                 return;
115         }
116         if (json_lock == 1) {
117                 // Already processing; wait a bit.
118                 json_lock = 2;
119                 setTimeout(function() { json_lock = 1; reread_file(event, filename); }, 100);
120                 return;
121         }
122         json_lock = 1;
123
124         console.log("Rereading " + JSON_FILENAME);
125         fs.open(JSON_FILENAME, 'r+', function(err, fd) {
126                 if (err) throw err;
127                 fs.fstat(fd, function(err, st) {
128                         if (err) throw err;
129                         var buffer = new Buffer(1048576);
130                         fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
131                                 if (err) throw err;
132                                 fs.close(fd, function() {
133                                         var new_json_contents = buffer.toString('utf8', 0, bytesRead);
134                                         replace_json(new_json_contents, st.mtime.getTime());
135                                 });
136                         });
137                 });
138         });
139
140         if (touch_timer !== undefined) {
141                 clearTimeout(touch_timer);
142         }
143         touch_timer = setTimeout(function() {
144                 console.log("Touching analysis.json due to no other activity");
145                 var now = Date.now() / 1000;
146                 fs.utimes(JSON_FILENAME, now, now);
147         }, 30000);
148 }
149 var possibly_wakeup_clients = function() {
150         var num_viewers = count_viewers();
151         for (var i in sleeping_clients) {
152                 mark_recently_seen(sleeping_clients[i].unique);
153                 send_json(sleeping_clients[i].response,
154                           sleeping_clients[i].ims,
155                           sleeping_clients[i].accept_gzip,
156                           num_viewers);
157         }
158         sleeping_clients = {};
159 }
160 var send_404 = function(response) {
161         response.writeHead(404, {
162                 'Content-Type': 'text/plain',
163         });
164         response.write('Something went wrong. Sorry.');
165         response.end();
166 }
167 var handle_viewer_override = function(request, u, response) {
168         // Only accept requests from localhost.
169         var peer = request.socket.localAddress;
170         if ((peer != '127.0.0.1' && peer != '::1') || request.headers['x-forwarded-for']) {
171                 console.log("Refusing viewer override from " + peer);
172                 send_404(response);
173         } else {
174                 viewer_count_override = (u.query)['num'];
175                 response.writeHead(200, {
176                         'Content-Type': 'text/plain',
177                 });
178                 response.write('OK.');
179                 response.end();
180         }
181 }
182 var send_json = function(response, ims, accept_gzip, num_viewers) {
183         var this_json = diff_json[ims] || json;
184
185         var headers = {
186                 'Content-Type': 'text/json',
187                 'X-RGLM': this_json.last_modified,
188                 'X-RGNV': num_viewers,
189                 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
190                 'Vary': 'Accept-Encoding',
191         };
192
193         if (MINIMUM_VERSION) {
194                 headers['X-RGMV'] = MINIMUM_VERSION;
195         }
196
197         if (accept_gzip) {
198                 headers['Content-Length'] = this_json.gzip.length;
199                 headers['Content-Encoding'] = 'gzip';
200                 response.writeHead(200, headers);
201                 response.write(this_json.gzip);
202         } else {
203                 headers['Content-Length'] = this_json.plain.length;
204                 response.writeHead(200, headers);
205                 response.write(this_json.plain);
206         }
207         response.end();
208 }
209 var mark_recently_seen = function(unique) {
210         if (unique) {
211                 last_seen_clients[unique] = (new Date).getTime();
212         }
213 }
214 var count_viewers = function() {
215         if (viewer_count_override !== undefined) {
216                 return viewer_count_override;
217         }
218
219         var now = (new Date).getTime();
220
221         // Go through and remove old viewers, and count them at the same time.
222         var new_last_seen_clients = {};
223         var num_viewers = 0;
224         for (var unique in last_seen_clients) {
225                 if (now - last_seen_clients[unique] < 5000) {
226                         ++num_viewers;
227                         new_last_seen_clients[unique] = last_seen_clients[unique];
228                 }
229         }
230
231         // Also add sleeping clients that we would otherwise assume timed out.
232         for (var request_id in sleeping_clients) {
233                 var unique = sleeping_clients[request_id].unique;
234                 if (unique && !(unique in new_last_seen_clients)) {
235                         ++num_viewers;
236                 }
237         }
238
239         last_seen_clients = new_last_seen_clients;
240         return num_viewers;
241 }
242
243 // Set up a watcher to catch changes to the file, then do an initial read
244 // to make sure we have a copy.
245 fs.watch(path.dirname(JSON_FILENAME), reread_file);
246 reread_file(null, path.basename(JSON_FILENAME));
247
248 var server = http.createServer();
249 server.on('request', function(request, response) {
250         var u = url.parse(request.url, true);
251         var ims = (u.query)['ims'];
252         var unique = (u.query)['unique'];
253
254         console.log(((new Date).getTime()*1e-3).toFixed(3) + " " + request.url);
255         if (u.pathname === '/override-num-viewers') {
256                 handle_viewer_override(request, u, response);
257                 return;
258         }
259         if (u.pathname !== '/analysis.pl') {
260                 // This is not the request you are looking for.
261                 send_404(response);
262                 return;
263         }
264
265         mark_recently_seen(unique);
266
267         var accept_encoding = request.headers['accept-encoding'];
268         var accept_gzip;
269         if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
270                 accept_gzip = true;
271         } else {
272                 accept_gzip = false;
273         }
274
275         // If we already have something newer than what the user has,
276         // just send it out and be done with it.
277         if (json !== undefined && (!ims || json.last_modified > ims)) {
278                 send_json(response, ims, accept_gzip, count_viewers());
279                 return;
280         }
281
282         // OK, so we need to hang until we have something newer.
283         // Put the user on the wait list.
284         var client = {};
285         client.response = response;
286         client.request_id = request_id;
287         client.accept_gzip = accept_gzip;
288         client.unique = unique;
289         client.ims = ims;
290         sleeping_clients[request_id++] = client;
291
292         request.socket.client = client;
293 });
294 server.on('connection', function(socket) {
295         socket.on('close', function() {
296                 var client = socket.client;
297                 if (client) {
298                         mark_recently_seen(client.unique);
299                         delete sleeping_clients[client.request_id];
300                 }
301         });
302 });
303
304 if (process.argv.length >= 3) {
305         port = parseInt(process.argv[2]);
306 }
307 server.listen(port);