1 // node.js version of analysis.pl; hopefully scales a bit better
2 // for this specific kind of task.
5 var http = require('http');
6 var fs = require('fs');
7 var url = require('url');
8 var querystring = require('querystring');
9 var path = require('path');
10 var zlib = require('zlib');
11 var delta = require('./js/json_delta.js');
14 var HISTORY_TO_KEEP = 5;
15 var MINIMUM_VERSION = null;
18 var json_filename = '/srv/analysis.sesse.net/www/analysis.json';
19 if (process.argv.length >= 3) {
20 json_filename = process.argv[2];
23 // TCP port to listen on.
25 if (process.argv.length >= 4) {
26 port = parseInt(process.argv[3]);
29 // If set to 1, we are already processing a JSON update and should not
30 // start a new one. If set to 2, we are _also_ having one in the queue.
33 // The current contents of the file to hand out, and its last modified time.
36 // The last five timestamps, and diffs from them to the latest version.
37 var historic_json = [];
40 // The list of clients that are waiting for new data to show up.
41 // Uniquely keyed by request_id so that we can take them out of
42 // the queue if they close the socket.
43 var sleeping_clients = {};
46 // List of when clients were last seen, keyed by their unique ID.
47 // Used to show a viewer count to the user.
48 var last_seen_clients = {};
50 // The timer used to touch the file every 30 seconds if nobody
51 // else does it for us. This makes sure we don't have clients
52 // hanging indefinitely (which might have them return errors).
53 var touch_timer = undefined;
55 // If we are behind Varnish, we can't count the number of clients
56 // ourselves, so some external log-tailing daemon needs to tell us.
57 var viewer_count_override = undefined;
59 var replace_json = function(new_json_contents, mtime) {
60 // Generate the list of diffs from the last five versions.
61 if (json !== undefined) {
62 // If two versions have the same mtime, clients could have either.
63 // Note the fact, so that we never insert it.
64 if (json.last_modified == mtime) {
65 json.invalid_base = true;
67 if (!json.invalid_base) {
68 historic_json.push(json);
69 if (historic_json.length > HISTORY_TO_KEEP) {
70 historic_json.shift();
76 parsed: JSON.parse(new_json_contents),
77 plain: new_json_contents,
80 create_json_historic_diff(new_json, historic_json.slice(0), {}, function(new_diff_json) {
81 // gzip the new version (non-delta), and put it into place.
82 zlib.gzip(new_json_contents, function(err, buffer) {
85 new_json.gzip = buffer;
87 diff_json = new_diff_json;
90 // Finally, wake up any sleeping clients.
91 possibly_wakeup_clients();
96 var create_json_historic_diff = function(new_json, history_left, new_diff_json, cb) {
97 if (history_left.length == 0) {
102 var histobj = history_left.shift();
103 var diff = delta.JSON_delta.diff(histobj.parsed, new_json.parsed);
104 var diff_text = JSON.stringify(diff);
105 zlib.gzip(diff_text, function(err, buffer) {
107 new_diff_json[histobj.last_modified] = {
111 last_modified: new_json.last_modified,
113 create_json_historic_diff(new_json, history_left, new_diff_json, cb);
117 var reread_file = function(event, filename) {
118 if (filename != path.basename(json_filename)) {
121 if (json_lock >= 2) {
124 if (json_lock == 1) {
125 // Already processing; wait a bit.
127 setTimeout(function() { json_lock = 1; reread_file(event, filename); }, 100);
132 console.log("Rereading " + json_filename);
133 fs.open(json_filename, 'r+', function(err, fd) {
135 fs.fstat(fd, function(err, st) {
137 var buffer = new Buffer(1048576);
138 fs.read(fd, buffer, 0, 1048576, 0, function(err, bytesRead, buffer) {
140 fs.close(fd, function() {
141 var new_json_contents = buffer.toString('utf8', 0, bytesRead);
142 replace_json(new_json_contents, st.mtime.getTime());
148 if (touch_timer !== undefined) {
149 clearTimeout(touch_timer);
151 touch_timer = setTimeout(function() {
152 console.log("Touching analysis.json due to no other activity");
153 var now = Date.now() / 1000;
154 fs.utimes(json_filename, now, now);
157 var possibly_wakeup_clients = function() {
158 var num_viewers = count_viewers();
159 for (var i in sleeping_clients) {
160 mark_recently_seen(sleeping_clients[i].unique);
161 send_json(sleeping_clients[i].response,
162 sleeping_clients[i].ims,
163 sleeping_clients[i].accept_gzip,
166 sleeping_clients = {};
168 var send_404 = function(response) {
169 response.writeHead(404, {
170 'Content-Type': 'text/plain',
172 response.write('Something went wrong. Sorry.');
175 var handle_viewer_override = function(request, u, response) {
176 // Only accept requests from localhost.
177 var peer = request.socket.localAddress;
178 if ((peer != '127.0.0.1' && peer != '::1') || request.headers['x-forwarded-for']) {
179 console.log("Refusing viewer override from " + peer);
182 viewer_count_override = (u.query)['num'];
183 response.writeHead(200, {
184 'Content-Type': 'text/plain',
186 response.write('OK.');
190 var send_json = function(response, ims, accept_gzip, num_viewers) {
191 var this_json = diff_json[ims] || json;
194 'Content-Type': 'text/json',
195 'X-RGLM': this_json.last_modified,
196 'X-RGNV': num_viewers,
197 'Access-Control-Expose-Headers': 'X-RGLM, X-RGNV, X-RGMV',
198 'Vary': 'Accept-Encoding',
201 if (MINIMUM_VERSION) {
202 headers['X-RGMV'] = MINIMUM_VERSION;
206 headers['Content-Length'] = this_json.gzip.length;
207 headers['Content-Encoding'] = 'gzip';
208 response.writeHead(200, headers);
209 response.write(this_json.gzip);
211 headers['Content-Length'] = this_json.plain.length;
212 response.writeHead(200, headers);
213 response.write(this_json.plain);
217 var mark_recently_seen = function(unique) {
219 last_seen_clients[unique] = (new Date).getTime();
222 var count_viewers = function() {
223 if (viewer_count_override !== undefined) {
224 return viewer_count_override;
227 var now = (new Date).getTime();
229 // Go through and remove old viewers, and count them at the same time.
230 var new_last_seen_clients = {};
232 for (var unique in last_seen_clients) {
233 if (now - last_seen_clients[unique] < 5000) {
235 new_last_seen_clients[unique] = last_seen_clients[unique];
239 // Also add sleeping clients that we would otherwise assume timed out.
240 for (var request_id in sleeping_clients) {
241 var unique = sleeping_clients[request_id].unique;
242 if (unique && !(unique in new_last_seen_clients)) {
247 last_seen_clients = new_last_seen_clients;
251 // Set up a watcher to catch changes to the file, then do an initial read
252 // to make sure we have a copy.
253 fs.watch(path.dirname(json_filename), reread_file);
254 reread_file(null, path.basename(json_filename));
256 var server = http.createServer();
257 server.on('request', function(request, response) {
258 var u = url.parse(request.url, true);
259 var ims = (u.query)['ims'];
260 var unique = (u.query)['unique'];
262 console.log(((new Date).getTime()*1e-3).toFixed(3) + " " + request.url);
263 if (u.pathname === '/override-num-viewers') {
264 handle_viewer_override(request, u, response);
267 if (u.pathname !== '/analysis.pl') {
268 // This is not the request you are looking for.
273 mark_recently_seen(unique);
275 var accept_encoding = request.headers['accept-encoding'];
277 if (accept_encoding !== undefined && accept_encoding.match(/\bgzip\b/)) {
283 // If we already have something newer than what the user has,
284 // just send it out and be done with it.
285 if (json !== undefined && (!ims || json.last_modified > ims)) {
286 send_json(response, ims, accept_gzip, count_viewers());
290 // OK, so we need to hang until we have something newer.
291 // Put the user on the wait list.
293 client.response = response;
294 client.request_id = request_id;
295 client.accept_gzip = accept_gzip;
296 client.unique = unique;
298 sleeping_clients[request_id++] = client;
300 request.socket.client = client;
302 server.on('connection', function(socket) {
303 socket.on('close', function() {
304 var client = socket.client;
306 mark_recently_seen(client.unique);
307 delete sleeping_clients[client.request_id];