From c76af731cb57536c8dd6433f1c6418a7906bdd41 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 9 Feb 2012 23:00:30 +0100 Subject: [PATCH] Added a script to parse the HTML from www.positivegaming.com. --- parse/parse-ccbs-tournament.pl | 206 +++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 parse/parse-ccbs-tournament.pl diff --git a/parse/parse-ccbs-tournament.pl b/parse/parse-ccbs-tournament.pl new file mode 100644 index 0000000..29aadf9 --- /dev/null +++ b/parse/parse-ccbs-tournament.pl @@ -0,0 +1,206 @@ +#! /usr/bin/perl +use strict; +use warnings; +use Date::Manip; + +# Parses tournament results from our own HTML (!). + +my ($season, $country, $location, $date, $mix, $system); +my ($name, $round, $group, $player, $position, $chosensongs, @rsongs); + +print "begin;\n"; + +while (<>) { + if (/\s* (.*?) \s* <\/h1>/x) { + $name = $1; + next; + } + if (/
  • Season: \s* (.*?) <\/li>/x) { + $season = $1; + $season =~ s/Standalone/FrittstÃ¥ende/; + $season =~ s/Norwegian national championship/Norgesmesterskap/; + next; + } + if (/
  • Country: \s* (.*?) <\/li>/x) { + $country = $1; + next; + } + if (/
  • Place: \s* (.*?) <\/li>/x) { + $location = $1; + next; + } + if (/
  • Date: \s* (.*?) <\/li>/x) { + $date = $1; + next; + } + if (/
  • Machine: \s* (.*?) <\/li>/x) { + $mix = $1; + next; + } + if (/
  • Scoring \s* system: \s* (.*?) <\/li>/x) { + $system = $1; + + print "INSERT INTO tournaments \n"; + print " (season, tournamentname, country, location, \"date\", machine, scoringsystem) VALUES (\n"; + print " (SELECT season FROM seasons WHERE seasonname='$season' ),\n"; + print " '$name',\n"; + print " (SELECT country FROM countries WHERE countryname='$country'),\n"; + print " '$location',\n"; + printf " '%s',\n", Date::Manip::UnixDate($date, '%Y-%m-%d'); + print " (SELECT machine FROM machines WHERE machinename='$mix'),\n"; + print " (SELECT scoringsystem FROM scoringsystems WHERE scoringsystemname='$system')\n"; + print ");\n"; + + next; + } + + # Player rankings + if (/^ \s*
  • ( \d+) \. \s* place: \s* ( \S+ ) (?: \s* \( ( [A-Z]+ ) \) )? <\/a> , \s* with \s* ( \d+ ) \s* points/x) { + my ($ranking, $nick, $code, $points) = ($1, $2, $3, $4); + + # Woot, evil + print "INSERT INTO players (player, nick, country, club) \n"; + print "WITH code_lookup AS ( \n"; + print " SELECT ( SELECT country FROM countries WHERE countryname='Norway' ) AS country, club FROM clubs WHERE clubcode='$code'\n"; + print " UNION ALL SELECT country, NULL FROM countries WHERE countrycode='$code' \n"; + print " UNION ALL SELECT country, NULL FROM countries WHERE countrycode='NOR' \n"; + print ") \n"; + print " SELECT nextval('players_player_seq'), \n"; + print " '$nick', \n"; + print " ( SELECT country FROM code_lookup LIMIT 1 ), \n"; + print " ( SELECT club FROM code_lookup LIMIT 1 ) \n"; + print "WHERE '$nick' NOT IN ( SELECT nick FROM players );\n"; + + print "INSERT INTO tournamentrankings (tournament, ranking, player, points) VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $ranking,\n"; + print " (SELECT player FROM players WHERE nick='$nick'),\n"; + print " $points);\n"; + } + + # New round + if (/

    Runde \s* (\d+) \s* <\/a><\/h2\>/x) { + $round = $1; + + print "INSERT INTO rounds (tournament, round, randomsongs, chosensongs) \n"; + print " VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $round,\n"; + print " 0, 0);\n"; # Don't worry, we'll fix it later :-P + } + + # New group + if (//x) { + $group = $1; + $position = 1; + @rsongs = (); + $chosensongs = 0; + + print "INSERT INTO groups (tournament, round, parallel) VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $round, $group);\n"; + } + + # New random song + if (/ ( .*?) <\/a><\/th>/x) { + my $song = $1; + $song =~ s/'/''/g; + push @rsongs, $song; + + print "INSERT INTO roundrandomsongs (tournament, round, parallel, song) VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $round, $group,\n"; + print " (SELECT song FROM songs WHERE title='$song')\n"; + print ");\n"; + + # Correct the random songs in the table + printf "UPDATE rounds SET randomsongs=%u WHERE \n", scalar @rsongs; + print " tournament=(SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )) AND \n"; + print " round=$round;\n"; + } + + # New self-selected song + if (/Self-selected<\/th>/x) { + $chosensongs++; + printf "UPDATE rounds SET chosensongs=$chosensongs WHERE \n"; + print " tournament=(SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )) AND \n"; + print " round=$round;\n"; + } + + # Player's results (header) + if (/^\s* (\S+) (?: \s* \( ( [A-Z]+ ) \) )? <\/a> \s*$/x) { + $player = $1; + + printf "INSERT INTO roundparticipation (tournament, round, parallel, player, position) VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $round, $group,\n"; + print " (SELECT player FROM players WHERE nick='$player'),\n"; + print " $position\n"; + print ");\n"; + ++$position; + + chomp ($_ = <>); + die unless $_ =~ /<\/th>/; + + # Self-selected songs + for my $i (1..scalar @rsongs) { + chomp ($_ = <>); + die unless $_ =~ / (\d+) <\/td>/x; + my $score = $1; + + chomp ($_ = <>); + die unless $_ =~ / (\S+) <\/td>/x; + my $playmode = $1; + + chomp ($_ = <>); + die unless $_ =~ / (\S+) <\/td>/x; + my $difficulty = $1; + + printf "INSERT INTO scores (tournament, round, parallel, player, songnumber, song, playmode, difficulty, chosen, score) VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $round, $group,\n"; + print " (SELECT player FROM players WHERE nick='$player'),\n"; + print " $i,\n"; + print " (SELECT song FROM songs WHERE title='$rsongs[$i-1]'),\n"; + print " '$playmode',\n"; + print " '$difficulty',\n"; + print " 'f',\n"; + print " $score);\n"; + } + + # Chosen songs + for my $i (1..$chosensongs) { + my $songnumber = $i + scalar @rsongs; + + chomp ($_ = <>); + die unless $_ =~ / (\d+) <\/td>/x; + my $score = $1; + + chomp ($_ = <>); + die unless $_ =~ / (\S+) <\/td>/x; + my $playmode = $1; + + chomp ($_ = <>); + die unless $_ =~ / (\S+) <\/td>/x; + my $difficulty = $1; + + chomp ($_ = <>); + die unless $_ =~ / (.*?) <\/a><\/td>/x; + my $song = $1; + $song =~ s/'/''/g; + + printf "INSERT INTO scores (tournament, round, parallel, player, songnumber, song, playmode, difficulty, chosen, score) VALUES (\n"; + print " (SELECT tournament FROM tournaments WHERE tournamentname='$name' AND season=( SELECT season FROM seasons WHERE seasonname='$season' )),\n"; + print " $round, $group,\n"; + print " (SELECT player FROM players WHERE nick='$player'),\n"; + print " $songnumber,\n"; + print " (SELECT song FROM songs WHERE title='$song'),\n"; + print " '$playmode',\n"; + print " '$difficulty',\n"; + print " 't',\n"; + print " $score);\n"; + } + } +} + +printf "commit;\n"; -- 2.39.2