From 25b18d4430a0ba0b75b69180a41a39178e9020a8 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 18 Aug 2013 11:30:20 +0200 Subject: [PATCH 1/1] Initial commit. --- Mirror/AS.pm | 18 +++ Mirror/Math.pm | 36 +++++ README | 44 ++++++ redir.pl | 387 +++++++++++++++++++++++++++++++++++++++++++++++++ update.sh | 66 +++++++++ 5 files changed, 551 insertions(+) create mode 100644 Mirror/AS.pm create mode 100644 Mirror/Math.pm create mode 100644 README create mode 100755 redir.pl create mode 100755 update.sh diff --git a/Mirror/AS.pm b/Mirror/AS.pm new file mode 100644 index 0000000..260304d --- /dev/null +++ b/Mirror/AS.pm @@ -0,0 +1,18 @@ +package Mirror::AS; + +use strict; +use warnings; + +sub convert { + my $as = shift; + + $as =~ s/^AS//; + + if ($as =~ m/(\d+)\.(\d+)/) { + $as = unpack('N', pack('nn', $1, $2)); + } + + return $as; +} + +1; diff --git a/Mirror/Math.pm b/Mirror/Math.pm new file mode 100644 index 0000000..963387e --- /dev/null +++ b/Mirror/Math.pm @@ -0,0 +1,36 @@ +package Mirror::Math; + +use strict; +use warnings; +use POSIX; + +sub stddev { + my @elems = @_; + my ($avg, $var) = (0, 0); + + return 0 if (scalar(@elems) == 1); + + for (@elems) { + $avg += $_; + } + $avg /= scalar(@elems); + + for (@elems) { + $var += ($_-$avg)**2; + } + $var /= scalar(@elems)-1; + + return sqrt($var); +} + +sub iquartile(@) { + my @elems = @_; + my ($lower, $upper) = (0.25 * $#elems, 0.75 * $#elems); + + $lower = POSIX::ceil($lower); + $upper = POSIX::ceil($upper); + + return @elems[$lower..$upper]; +} + +1; diff --git a/README b/README new file mode 100644 index 0000000..ce2bdc4 --- /dev/null +++ b/README @@ -0,0 +1,44 @@ +Video mirror redirector +======================= + +This fulfills a simple need: Redirecting people on to the closest mirror +of many. It was specifically written for Debconf 13, but should be useable +for any kind of dumb HTTP mirror. + +It is based on http.debian.net (https://github.com/rgeissert/http-redirector), +but heavily modified, since we do not need most of what a Debian mirror needs. +You will most likely need some local tweaking of e.g. the mirror list; things +are a lot more hard-coded than in the original + +Getting started +=============== + +Required packages: + libcgi-simple-perl + libgeo-ip-perl + libwww-perl + wget + +Run ./update.sh, it will download the geoip databases, the mirrors +list, build the database used by the redirector, and check the mirrors +for errors. + +Check the first lines of redir.pl for the invocation (or look at the +example below.) + +Real life testing +================= + +Here's the Apache snippet we used during Debconf 13: + + AddHandler cgi-script .pl + Options +ExecCGI + + RewriteEngine On + RewriteRule ^/(.*)$ /redir.pl?url=$1 [L] + +Credits +======= + +"This product includes GeoLite data created by MaxMind, available from +http://maxmind.com/" diff --git a/redir.pl b/redir.pl new file mode 100755 index 0000000..173e679 --- /dev/null +++ b/redir.pl @@ -0,0 +1,387 @@ +#!/usr/bin/perl -w + +# Redirector for hires.video.debconf.org; redirects the viewer to the +# closest reflector geographically/network-wise. +# Based on the http.debian.net code; heavily modified for the needs of the +# Debconf redirector. + +#################### +# Copyright (C) 2011, 2012 by Raphael Geissert +# Copyright (C) 2013 by Steinar H. Gunderson +# +# This file is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this file If not, see . +# +# On Debian systems, the complete text of the GNU General +# Public License 3 can be found in '/usr/share/common-licenses/GPL-3'. +#################### + +use strict; +use warnings; +use lib '.'; + +my @ipv4_mirrors = ( + '117.121.243.49', + '193.35.52.50', + '46.4.205.40', + '81.7.11.163', # pkern (DE) + '106.187.92.155', +); +my @ipv6_mirrors = ( + '2001:67c:29f4::50', + '2400:8900::f03c:91ff:feae:fac8', + '2a01:4f8:131:1528:deb::23', + '2a02:180:1:1::517:ba3', # pkern (DE) +); +my @all_mirrors = ( + @ipv4_mirrors, @ipv6_mirrors +); + +# Usage: redir.pl?url=/foo1.ts +# Test (make sure -debug1 is set below): +# ./redir.pl +# REMOTE_ADDR=1.2.3.4 ./redir.pl +use CGI::Simple qw(-debug1); +$CGI::Simple::POST_MAX = 0; +$CGI::Simple::DISABLE_UPLOADS = 1; +my $q = new CGI::Simple; + +my $request_method = $q->request_method() || 'HEAD'; +# abort POST and other requests ASAP +if ($request_method ne 'GET' && $request_method ne 'HEAD') { + print "Status: 405 Not Allowed\r\n"; + print "Allow: GET, HEAD\r\n\r\n"; + exit; +} + +use Geo::IP; +use Math::Trig; +use Mirror::Math; +use Mirror::AS; + +our $metric = ''; # alt: taxicab (default) | euclidean +our $xtra_headers = 1; +my $random_sort = 1; + +my %nearby_continents = ( + 'AF' => [ qw(EU NA AS SA OC) ], + 'SA' => [ qw(NA EU OC AS AF) ], + 'OC' => [ qw(NA AS EU SA AF) ], + 'AS' => [ qw(EU NA OC SA AF) ], + 'NA' => [ qw(EU AS OC SA AF) ], + 'EU' => [ qw(NA AS SA OC AF) ], +); + +my %nearby_country = ( + 'NA' => [ qw(US CA) ], +); + +sub print_xtra($$); +sub clean_url($); +sub consider_mirror($); +sub url_for_mirror($); +sub geolocate($); + +my @output; +my $action = 'redir'; + +unless ($request_method eq 'HEAD') { + $xtra_headers = 0; +} else { + print "Vary: x-web-demo\r\n"; +} + +$action = 'demo' if (exists($ENV{'HTTP_X_WEB_DEMO'})); + +#### +my $IP = $ENV{'HTTP_X_FORWARDED_FOR'}; +if (!defined($IP)) { + $IP = $q->remote_addr; +} +$IP =~ s/,.*//; +print STDERR "IP: $IP\n"; +#$IP = `wget -O- -q http://myip.dnsomatic.com/` if ($IP eq '127.0.0.1'); +#### + +my $url = clean_url($q->param('url') || ''); + +my $client_geoloc = geolocate($IP); +print_xtra('IP', $client_geoloc->{ip}); +print_xtra('AS', $client_geoloc->{as}); +print_xtra('URL', $url); +print_xtra('Country', $client_geoloc->{country}); +print_xtra('Continent', $client_geoloc->{continent}); + +# Geolocate all the mirrors +our $server_geoloc = {}; +for my $mirror (@all_mirrors, '80.83.52.180') { + $server_geoloc->{$mirror} = geolocate($mirror); +} + +my @mirrors; +if ($IP =~ /:/) { + @mirrors = @ipv6_mirrors; +} else { + @mirrors = @ipv4_mirrors; +} + +our %hosts; +my $match_type = ''; + +# special override for debconf ranges (ick) +if ($IP =~ /^83\.68\.21[6789]\./ || $IP =~ /^80\.83\.(48|49|50|51)\./) { + $match_type = 'Debconf-override'; + consider_mirror('80.83.52.180'); +} elsif ($IP =~ /^2001:41b8:203:/) { + $match_type = 'Debconf-override-v6-to-v4'; + consider_mirror('80.83.52.180'); # no internal IPv6 address +} + +# match by AS +if (!$match_type) { + for my $mirror (@mirrors) { + next unless ($server_geoloc->{$mirror}{as} eq $client_geoloc->{as}); + next unless ($server_geoloc->{$mirror}{continent} eq $client_geoloc->{continent}); + + $match_type = 'AS'; + consider_mirror($mirror); + } +} + +# match by AS peer +# if (!$match_type && $as && $peers_db_store && !$ipv6 && -f $peers_db_store) { +# my $peers_db = retrieve($peers_db_store); +# +# foreach my $match (keys %{$peers_db->{$as}}) { +# next unless (exists($db->{'all'}{$match}{$mirror_type.'-http'})); +# next unless (mirror_is_in_continent($rdb, $match, $continent)); +# +# $match_type = 'AS-peer' if (consider_mirror ($match)); +# } +# } + +# match by country +if (!$match_type) { + for my $mirror (@mirrors) { + next unless ($server_geoloc->{$mirror}{country} eq $client_geoloc->{country}); + $match_type = 'country'; + consider_mirror($mirror); + } +} + +# match by nearby-country +my $continent = $client_geoloc->{continent}; +if (!$match_type && exists($nearby_country{$continent})) { + for my $country (@{$nearby_country{$continent}}) { + for my $mirror (@mirrors) { + next unless ($server_geoloc->{$mirror}{country} eq $country); + $match_type = 'nearby-country'; + consider_mirror($mirror); + } + } +} + +# match by continent +if (!$match_type) { + my @continents = ($continent, @{$nearby_continents{$continent}}); + + for my $mirror_continent (@continents) { + last if ($match_type); + + my $mtype; + if ($mirror_continent eq $continent) { + $mtype = 'continent'; + } else { + $mtype = 'nearby-continent'; + } + + for my $mirror (@mirrors) { + next unless ($server_geoloc->{$mirror}{continent} eq $mirror_continent); + $match_type = $mtype; + consider_mirror($mirror); + } + } +} + +# something went awry, we don't know how to handle this user, we failed +if (!$match_type) { + print "Status: 503 Service Unavailable\r\n\r\n"; + exit; +} + + +my @sorted_hosts = sort { $hosts{$a} <=> $hosts{$b} } keys %hosts; +my @close_hosts; +my $dev; + +my @iq_dists = map { $hosts{$_} } Mirror::Math::iquartile(@sorted_hosts); +$dev = Mirror::Math::stddev(@iq_dists); + +# Closest host (or one of many), to use as the base distance +my $host = $sorted_hosts[0]; + +print_xtra('Std-Dev', $dev); +print_xtra('Population', scalar(@sorted_hosts)); +print_xtra('Closest-Distance', $hosts{$host}); + +for my $h (@sorted_hosts) { + # NOTE: this might need some additional work, as we should probably + # guarantee a certain amount of alt hosts to choose from + if (($hosts{$h} - $hosts{$host}) <= $dev && + (scalar(@close_hosts) < 4 || $hosts{$h} == $hosts{$close_hosts[-1]})) { + push @close_hosts, $h; + } else { + # the list is sorted, if we didn't accept this one won't accept + # the next + last; + } +} + +if ($random_sort) { + my $n = int(rand scalar(@close_hosts)); + $host = $close_hosts[$n]; +} +print_xtra('Distance', $hosts{$host}); +print_xtra('Match-Type', $match_type); + +if ($action eq 'redir') { + print "Content-type: text/plain\r\n"; + print "Status: 302 Moved Temporarily\r\n"; + + my $port = 9094; + if ($host eq '2001:67c:29f4::50' || $host eq '193.35.52.50') { + $port = 9095; + } + if ($host =~ /:/) { + print "Location: http://[".$host."]:$port/".$url."\r\n"; + } else { + print "Location: http://".$host.":$port/".$url."\r\n"; + } +} elsif ($action eq 'demo') { + print "Status: 200 OK\r\n"; + print "Cache-control: no-cache\r\n"; + print "Pragma: no-cache\r\n"; +} elsif ($action eq 'list') { + print "Content-type: text/plain\r\n"; + print "Status: 200 OK\r\n"; + for my $host (@close_hosts) { + push @output, url_for_mirror($host)."\n"; + } +} else { + die("FIXME: unknown action '$action'"); +} + +print "\r\n"; + +for my $line (@output) { + print $line; +} + +exit; + +sub print_xtra($$) { + print "X-$_[0]: $_[1]\r\n" + if ($xtra_headers); +} + +sub clean_url($) { + my $url = shift; + $url =~ s,//,/,g; + $url =~ s,^/,,; + $url =~ s,^\.\.?/,,g; + $url =~ s,(?<=/)\.\.?(?:/|$),,g; + $url = CGI::Simple->url_encode($url); + $url =~ s,%2F,/,g; + return $url; +} + +sub consider_mirror($) { + my ($ip) = @_; + + my $geoloc = $server_geoloc->{$ip}; + + my $pihalf = Math::Trig::pi / 2; + my $lon1 = Math::Trig::deg2rad($geoloc->{'lon'}); + my $lat1 = $pihalf - Math::Trig::deg2rad($geoloc->{'lat'}); + my $lon2 = Math::Trig::deg2rad($client_geoloc->{'lon'}); + my $lat2 = $pihalf - Math::Trig::deg2rad($client_geoloc->{'lat'}); + + $hosts{$ip} = Math::Trig::great_circle_distance($lon1, $lat1, $lon2, $lat2); +} + +sub geolocate($) { + my ($IP) = @_; + + our $ipv6 = ($IP =~ m/:/); + + # Handle IPv6 over IPv4 requests as if they originated from an IPv4 + if ($ipv6 && $IP =~ m/^200(2|1(?=:0:)):/) { + my $tunnel_type = $1; + $ipv6 = 0; + print_xtra('IPv6', $IP); + + if ($tunnel_type == 1) { # teredo + $IP =~ m/:(\w{0,2}?)(\w{1,2}):(\w{0,2}?)(\w{1,2})$/ or die; + $IP = join('.', hex($1)^0xff, hex($2)^0xff, hex($3)^0xff, hex($4)^0xff); + } elsif ($tunnel_type == 2) { # 6to4 + $IP =~ m/^2002:(\w{0,2}?)(\w{1,2}):(\w{0,2}?)(\w{1,2}):/ or die; + $IP = join('.', hex($1), hex($2), hex($3), hex($4)); + } + } + + my ($g_city, $g_as); + my ($geo_rec, $as); + our ($lat, $lon); + + if (!$ipv6) { + $g_city = Geo::IP->open('geoip/GeoLiteCity.dat', GEOIP_MMAP_CACHE); + $g_as = Geo::IP->open('geoip/GeoIPASNum.dat', GEOIP_MMAP_CACHE); + + $geo_rec = $g_city->record_by_addr($IP); + ($as) = split /\s+/, ($g_as->org_by_addr($IP) || ' '); + } else { + $g_city = Geo::IP->open('geoip/GeoLiteCityv6.dat', GEOIP_MMAP_CACHE); + $g_as = Geo::IP->open('geoip/GeoIPASNumv6.dat', GEOIP_MMAP_CACHE); + + $geo_rec = $g_city->record_by_addr_v6($IP); + ($as) = split /\s+/, ($g_as->org_by_addr_v6($IP) || ' '); + } + + if (!defined($geo_rec)) { + return { + ip => $IP, + as => 0, + country => '--', + continent => 'EU', + lon => 0.0, + lat => 0.0, + }; + } + + $as = Mirror::AS::convert($as); + + $lat = $geo_rec->latitude; + $lon = $geo_rec->longitude; + + my $continent = $geo_rec->continent_code; + $continent = 'EU' if ($continent eq '--'); + + return { + ip => $IP, + as => $as, + country => $geo_rec->country_code, + continent => $continent, + lon => $lon, + lat => $lat, + }; +} diff --git a/update.sh b/update.sh new file mode 100755 index 0000000..8af1ee3 --- /dev/null +++ b/update.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +#################### +# Copyright (C) 2011, 2012 by Raphael Geissert +# +# This file is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this file If not, see . +# +# On Debian systems, the complete text of the GNU General +# Public License 3 can be found in '/usr/share/common-licenses/GPL-3'. +#################### + +set -eu + +compression=gz +if which unxz >/dev/null; then + compression=xz +fi + +mkdir -p geoip +cd geoip +for db in asnum/GeoIPASNum.dat.gz GeoLiteCity.dat.$compression asnum/GeoIPASNumv6.dat.gz GeoLiteCityv6-beta/GeoLiteCityv6.dat.gz; do + wget -U '' -N http://geolite.maxmind.com/download/geoip/database/$db + db="$(basename "$db")" + case "$db" in + *.gz|*.xz) + file_comp="${db##*.}" + ;; + *) + echo "error: unknown compression of file $db" >&2 + exit 1 + ;; + esac + + decomp_db="${db%.$file_comp}" + if [ -f $decomp_db ]; then + [ $db -nt $decomp_db ] || continue + fi + rm -f new.$db + ln $db new.$db + case "$file_comp" in + gz) + gunzip -f new.$db + ;; + xz) + unxz -f new.$db + ;; + *) + echo "error: unknown decompressor for .$file_comp" >&2 + exit 1 + ;; + esac + mv new.$decomp_db $decomp_db + touch -r $db $decomp_db +done +cd - >/dev/null -- 2.39.2