]> git.sesse.net Git - remoteglot/blob - www/serve-analysis.js
Make sure we do not try to process more than one analysis.json update at a time.
[remoteglot] / www / serve-analysis.js
1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
3
4 // Modules.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var delta = require('./js/json_delta.js');
12
13 // Constants.
14 var JSON_FILENAME = '/srv/analysis.sesse.net/www/analysis.json';
15 var HISTORY_TO_KEEP = 5;
16
17 // If set to 1, we are already processing a JSON update and should not
18 // start a new one. If set to 2, we are _also_ having one in the queue.
19 var json_lock = 0;
20
21 // The current contents of the file to hand out, and its last modified time.
22 var json = undefined;
23
24 // The last five timestamps, and diffs from them to the latest version.
25 var historic_json = [];
26 var diff_json = {};
27
28 // The list of clients that are waiting for new data to show up.
29 // Uniquely keyed by request_id so that we can take them out of
30 // the queue if they close the socket.
31 var sleeping_clients = {};
32 var request_id = 0;
33
34 // List of when clients were last seen, keyed by their unique ID.
35 // Used to show a viewer count to the user.
36 var last_seen_clients = {};
37
38 // The timer used to touch the file every 30 seconds if nobody
39 // else does it for us. This makes sure we don't have clients
40 // hanging indefinitely (which might have them return errors).
41 var touch_timer = undefined;
42
43 // If we are behind Varnish, we can't count the number of clients
44 // ourselves, so some external log-tailing daemon needs to tell us.
45 var viewer_count_override = undefined;
46
47 var replace_json = function(new_json_contents, mtime) {
48         // Generate the list of diffs from the last five versions.
49         if (json !== undefined) {
50                 // If two versions have the same mtime, clients could have either.
51                 // Note the fact, so that we never insert it.
52                 if (json.last_modified == mtime) {
53                         json.invalid_base = true;
54                 }
55                 if (!json.invalid_base) {
56                         historic_json.push(json);
57                         if (historic_json.length > HISTORY_TO_KEEP) {
58                                 historic_json.shift();
59                         }
60                 }
61         }
62
63         var new_json = {
64                 parsed: JSON.parse(new_json_contents),
65                 plain: new_json_contents,
66                 last_modified: mtime
67         };
68         create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
69                 // gzip the new version (non-delta), and put it into place.
70                 zlib.gzip(new_json_contents, function(err, buffer) {
71                         if (err) throw err;
72
73                         new_json.gzip = buffer;
74                         json = new_json;
75                         diff_json = new_diff_json;
76                         json_lock = 0;
77
78                         // Finally, wake up any sleeping clients.
79                         possibly_wakeup_clients();
80                 });
81         });
82 }
83
84 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
85         if (history_left.length == 0) {
86                 cb(new_diff_json);
87                 return;
88         }
89
90         var histobj = history_left.shift();
91         var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
92         var diff_text = JSON.stringify(diff);
93         zlib.gzip(diff_text, function(err, buffer) {
94                 if (err) throw err;
95                 new_diff_json[histobj.last_modified] = {
96                         plain: diff,
97                         text: diff_text,
98                         gzip: buffer,
99                         last_modified: new_json.last_modified,
100                 };
101                 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
102         });
103 }
104
105 var reread_file = function(event, filename) {
106         if (filename != path.basename(JSON_FILENAME)) {
107                 return;
108         }
109         if (json_lock >= 2) {
110                 return;
111         }
112         if (json_lock == 1) {
113                 // Already processing; wait a bit.
114                 json_lock = 2;
115                 setTimeout(function() { json_lock = 1; reread_file(event, filename); }, 100);
116                 return;
117         }
118         json_lock = 1;
119
120         console.log("Rereading " + JSON_FILENAME);
121         fs.open(JSON_FILENAME, 'r+', function(err, fd) {
122                 if (err) throw err;
123                 fs.fstat(fd, function(err, st) {
124                         if (err) throw err;
125                         var buffer = new Buffer(1048576);
126                         fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
127                                 if (err) throw err;
128                                 fs.close(fd, function() {
129                                         var new_json_contents = buffer.toString('utf8', 0, bytesRead);
130                                         replace_json(new_json_contents, st.mtime.getTime());
131                                 });
132                         });
133                 });
134         });
135
136         if (touch_timer !== undefined) {
137                 clearTimeout(touch_timer);
138         }
139         touch_timer = setTimeout(function() {
140                 console.log("Touching analysis.json due to no other activity");
141                 var now = Date.now() / 1000;
142                 fs.utimes(JSON_FILENAME, now, now);
143         }, 30000);
144 }
145 var possibly_wakeup_clients = function() {
146         var num_viewers = count_viewers();
147         for (var i in sleeping_clients) {
148                 mark_recently_seen(sleeping_clients[i].unique);
149                 send_json(sleeping_clients[i].response,
150                           sleeping_clients[i].ims,
151                           sleeping_clients[i].accept_gzip,
152                           num_viewers);
153         }
154         sleeping_clients = {};
155 }
156 var send_404 = function(response) {
157         response.writeHead(404, {
158                 'Content-Type': 'text/plain',
159         });
160         response.write('Something went wrong. Sorry.');
161         response.end();
162 }
163 var handle_viewer_override = function(request, u, response) {
164         // Only accept requests from localhost.
165         var peer = request.socket.localAddress;
166         if ((peer != '127.0.0.1' && peer != '::1') || request.headers['x-forwarded-for']) {
167                 console.log("Refusing viewer override from " + peer);
168                 send_404(response);
169         } else {
170                 viewer_count_override = (u.query)['num'];
171                 response.writeHead(200, {
172                         'Content-Type': 'text/plain',
173                 });
174                 response.write('OK.');
175                 response.end();
176         }
177 }
178 var send_json = function(response, ims, accept_gzip, num_viewers) {
179         var this_json = diff_json[ims] || json;
180
181         var headers = {
182                 'Content-Type': 'text/json',
183                 'X-Remoteglot-Last-Modified': this_json.last_modified,
184                 'X-Remoteglot-Num-Viewers': num_viewers,
185                 'Access-Control-Expose-Headers': 'X-Remoteglot-Last-Modified, X-Remoteglot-Num-Viewers',
186                 'Expires': 'Mon, 01 Jan 1970 00:00:00 UTC',
187                 'Vary': 'Accept-Encoding',
188         };
189
190         if (accept_gzip) {
191                 headers['Content-Encoding'] = 'gzip';
192                 response.writeHead(200, headers);
193                 response.write(this_json.gzip);
194         } else {
195                 response.writeHead(200, headers);
196                 response.write(this_json.text);
197         }
198         response.end();
199 }
200 var mark_recently_seen = function(unique) {
201         if (unique) {
202                 last_seen_clients[unique] = (new Date).getTime();
203         }
204 }
205 var count_viewers = function() {
206         if (viewer_count_override !== undefined) {
207                 return viewer_count_override;
208         }
209
210         var now = (new Date).getTime();
211
212         // Go through and remove old viewers, and count them at the same time.
213         var new_last_seen_clients = {};
214         var num_viewers = 0;
215         for (var unique in last_seen_clients) {
216                 if (now - last_seen_clients[unique] < 5000) {
217                         ++num_viewers;
218                         new_last_seen_clients[unique] = last_seen_clients[unique];
219                 }
220         }
221
222         // Also add sleeping clients that we would otherwise assume timed out.
223         for (var request_id in sleeping_clients) {
224                 var unique = sleeping_clients[request_id].unique;
225                 if (unique && !(unique in new_last_seen_clients)) {
226                         ++num_viewers;
227                 }
228         }
229
230         last_seen_clients = new_last_seen_clients;
231         return num_viewers;
232 }
233
234 // Set up a watcher to catch changes to the file, then do an initial read
235 // to make sure we have a copy.
236 fs.watch(path.dirname(JSON_FILENAME), reread_file);
237 reread_file(null, path.basename(JSON_FILENAME));
238
239 var server = http.createServer();
240 server.on('request', function(request, response) {
241         var u = url.parse(request.url, true);
242         var ims = (u.query)['ims'];
243         var unique = (u.query)['unique'];
244
245         console.log((new Date).getTime()*1e-3 + " " + request.url);
246         if (u.pathname === '/override-num-viewers') {
247                 handle_viewer_override(request, u, response);
248                 return;
249         }
250         if (u.pathname !== '/analysis.pl') {
251                 // This is not the request you are looking for.
252                 send_404(response);
253                 return;
254         }
255
256         mark_recently_seen(unique);
257
258         var accept_encoding = request.headers['accept-encoding'];
259         var accept_gzip;
260         if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
261                 accept_gzip = true;
262         } else {
263                 accept_gzip = false;
264         }
265
266         // If we already have something newer than what the user has,
267         // just send it out and be done with it.
268         if (json !== undefined && (!ims || json.last_modified > ims)) {
269                 send_json(response, ims, accept_gzip, count_viewers());
270                 return;
271         }
272
273         // OK, so we need to hang until we have something newer.
274         // Put the user on the wait list.
275         var client = {};
276         client.response = response;
277         client.request_id = request_id;
278         client.accept_gzip = accept_gzip;
279         client.unique = unique;
280         client.ims = ims;
281         sleeping_clients[request_id++] = client;
282
283         request.socket.client = client;
284 });
285 server.on('connection', function(socket) {
286         socket.on('close', function() {
287                 var client = socket.client;
288                 if (client) {
289                         mark_recently_seen(client.unique);
290                         delete sleeping_clients[client.request_id];
291                 }
292         });
293 });
294 server.listen(5000);