]> git.sesse.net Git - ccbs/blobdiff - parse/parse-wiki-countrylist.pl
Add a script (and output SQL) for getting the country list from Wikipedia.
[ccbs] / parse / parse-wiki-countrylist.pl
diff --git a/parse/parse-wiki-countrylist.pl b/parse/parse-wiki-countrylist.pl
new file mode 100644 (file)
index 0000000..fc0437b
--- /dev/null
@@ -0,0 +1,34 @@
+#! /usr/bin/perl
+use strict;
+use warnings;
+
+# Parses country list from
+# http://en.wikipedia.org/w/index.php?title=List_of_IOC_country_codes&action=edit
+
+print "begin;\n";
+
+while (<>) {
+       m/
+         ^ \* \s*
+           ( [A-Z]{3} )                  # country code
+           \s* - \s*
+           \[\[
+           ( ?: .*? \| ) ?               # optional article lookup
+           ( .*? )                       # country name
+           \]\]
+        /x or next;
+        
+       my ($countrycode, $countryname) = ($1, $2);
+
+       # fix some wikisyntax ickyness :-)
+       $countryname =~ s/\]\]//g;
+       $countryname =~ s/\[\[//g;
+
+       # minimal SQL escaping
+       $countryname =~ s/'/\\'/g;
+
+       printf "INSERT INTO countries (countryname,countrycode) VALUES ('%s','%s');\n",
+               $countryname, $countrycode;
+}
+
+printf "commit;\n";