X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=bin%2Fsync.pl;h=2d315c9d2a646f7448d82b03db5f65b946f02ff4;hb=20a638d8c60944723c746ff33dad7cc4f7314439;hp=3c6d7743236783efca1ef1db4068f2e2cda752ab;hpb=72138aef53652b931e136963586dd505928e5af7;p=skvidarsync diff --git a/bin/sync.pl b/bin/sync.pl index 3c6d774..2d315c9 100644 --- a/bin/sync.pl +++ b/bin/sync.pl @@ -52,9 +52,28 @@ sub log_timing { printf "%s: %.0f ms.\n", $msg, 1e3 * $elapsed; } +# Unicode::Collate is seemingly slow, so add a cache for each name part +# (which, of course, only works for equality). Helps especially in +# --daemon mode, where even the first request gets a warm cache. +my %sort_key_cache = (); +my $sort_key_sp = $uca->getSortKey(' '); + sub sort_key { my $m = shift; - return $uca->getSortKey($m); + my $sk; + for my $part (split /\s+/, $m) { + my $psk = \$sort_key_cache{$part}; + if (!defined($$psk)) { + $$psk = $uca->getSortKey($part); + } + if (defined($sk)) { + $sk .= $sort_key_sp; + $sk .= $$psk; + } else { + $sk = $$psk; + } + } + return $sk; } sub get_oauth_bearer_token { @@ -132,7 +151,7 @@ sub matches_name { if (scalar @$ap >= 2 && scalar @bp >= 2 && $ap->[0] eq $bp[0]) { # First name matches, try to match some surname my $found = 0; - for my $ai (1..(scalar @$ap)) { + for my $ai (1..(scalar @$ap - 1)) { for my $bi (1..$#bp) { $found = 1 if ($ap->[$ai] eq $bp[$bi]); } @@ -235,7 +254,6 @@ sub get_group_assignments { sub update_assignment_db { my ($dbh, $channel, $ts, $assignments) = @_; - local $dbh->{AutoCommit} = 0; my %db_assignments = (); my $q = $dbh->prepare('SELECT name,group_name FROM current_group_membership_history WHERE channel=? AND ts=?'); $q->execute($channel, $ts); @@ -256,7 +274,6 @@ sub update_assignment_db { $q->execute($channel, $ts, $name, undef); } } - $dbh->commit; } sub get_spreadsheet_with_title { @@ -415,10 +432,11 @@ sub find_diff { } for my $real_name (keys %$have_colors) { next if (exists($want_colors->{$real_name})); - if (!exists($seen_names->{sort_key($real_name)})) { + my $sk = sort_key($real_name); + if (!exists($seen_names->{$sk})) { # TODO: This can somehow come if we try to add someone who's not in the sheet, too? skv_log("Ønsket å fjerne at $real_name skulle på trening, men de var ikke i regnearket lenger."); - } elsif (scalar @{$seen_names->{sort_key($real_name)}} > 1) { + } elsif (scalar @{$seen_names->{$sk}} > 1) { # Don't touch them. } else { skv_log("Fjerner at $real_name skal på trening."); @@ -483,6 +501,7 @@ sub db_connect { if (!defined($dbh)) { return undef; } + $dbh->{AutoCommit} = 0; $dbh->do('LISTEN skvupdate') or return undef; return $dbh; } @@ -494,6 +513,9 @@ sub run { @log = (); skv_log("Siste sync startet: " . POSIX::ctime(time)); + # For the logic on the “applied” table below. + $dbh->do('SET TRANSACTION ISOLATION LEVEL SERIALIZABLE'); + my $token = get_oauth_bearer_token($dbh, $ua); # Find the newest message, what it is linked to, and what was the one before it (for group diffing). @@ -557,12 +579,19 @@ sub run { log_timing($start, "/spreadsheets/"); my $sheets_json = JSON::XS::decode_json($response->decoded_content); + if (!exists($sheets_json->{'sheets'})) { + die "Missing sheets (error response?): " . $response->decoded_content; + } my $main_sheet_json = $sheets_json->{'sheets'}[0]; my $mapping_sheet_json = $sheets_json->{'sheets'}[1]; # Update the list of groups we've seen people in. + $start = [Time::HiRes::gettimeofday]; my %assignments = get_group_assignments($main_sheet_json); + log_timing($start, "Parsing group assignments"); + $start = [Time::HiRes::gettimeofday]; update_assignment_db($dbh, $config::invitation_channel, $invitation_ts, \%assignments); + log_timing($start, "Updating assignments in database"); $start = [Time::HiRes::gettimeofday]; my %seen_names = find_where_each_name_is($main_sheet_json); @@ -649,7 +678,7 @@ sub run { } } } - log_timing($start, "Fuzzy-searching for Slack name “$slack_name”"); + log_timing($start, "Fuzzy-searching for Slack name $slack_name"); if ($#candidates == -1) { skv_log("$slack_name ($userid) er påmeldt på Slack, men fant ikke et regneark-navn for dem."); possibly_nag_user($dbh, $ua, $userid, $invitation_ts, undef, \%slack_userid_to_slack_name); @@ -705,11 +734,12 @@ sub run { my $real_name = $slack_userid_to_real_name{$userid}; # See if we can find them in the spreadsheet. - if (!exists($seen_names{sort_key($real_name)})) { + my $sk = sort_key($real_name); + if (!exists($seen_names{$sk})) { # TODO: Perhaps move this logic further down, for consistency? skv_log("$slack_name ($userid) er påmeldt på Slack, og er mappet til $real_name, men var ikke i noen gruppe."); } else { - my $seen = $seen_names{sort_key($real_name)}; + my $seen = $seen_names{$sk}; if (scalar @$seen >= 2) { skv_log("$slack_name ($userid) er påmeldt på Slack, men står flere steder (se over); vet ikke hvilken celle som skal brukes."); } else { @@ -720,8 +750,6 @@ sub run { # Find the list of names we already marked yellow. my %have_colors = (); - $dbh->{AutoCommit} = 0; - $dbh->do('SET TRANSACTION ISOLATION LEVEL SERIALIZABLE'); $q = $dbh->prepare('SELECT name,color FROM applied WHERE channel=? AND ts=?'); $q->execute($config::invitation_channel, $invitation_ts); while (my $ref = $q->fetchrow_hashref) { @@ -803,6 +831,7 @@ sub run { my $elapsed = Time::HiRes::tv_interval($total_start); printf "Tok %.0f ms.\n", 1e3 * $elapsed; + print "\n"; } # Initialize the handles we need for communication. @@ -813,13 +842,13 @@ if ($#ARGV >= 0 && $ARGV[0] eq '--daemon') { run($dbh, $ua); while (1) { - while (!defined($dbh)) { + while (!defined($dbh) || !$dbh->ping) { print STDERR "Database connection lost, reconnecting...\n"; sleep 1; $dbh = db_connect(); } my $s = IO::Select->new($dbh->{pg_socket}); - my @ready = $s->can_read(10.0); + my @ready = $s->can_read(150.0); # slack.com HTTP timeout is ~3 minutes, sheets.googleapis.com is ~4 minutes. my @exceptions = $s->has_exception(0.0); if (scalar @exceptions > 0) { @@ -829,21 +858,31 @@ if ($#ARGV >= 0 && $ARGV[0] eq '--daemon') { } if (scalar @ready > 0) { eval { - $dbh->{AutoCommit} = 1; run($dbh, $ua); - $dbh->commit; }; if ($@) { warn "Died with: $@"; $dbh = undef; } + } else { + # Keep the connections alive and the token in the database fresh. + # (The two URLs we use don't really exist. Note that the first time, + # we might be making the initial connection to slack.com, since it's + # not a given that run() needed to talk to them.) + get_oauth_bearer_token($dbh, $ua); + $dbh->commit; + #my $start = [Time::HiRes::gettimeofday]; + $ua->get('https://sheets.googleapis.com/ping'); + #log_timing($start, 'sheets.googleapis.com (keepalive)'); + #$start = [Time::HiRes::gettimeofday]; + $ua->get('https://slack.com/api/ping'); + #log_timing($start, 'slack.com (keepalive)'); + #print STDERR "\n"; } } } elsif ($#ARGV >= 0 && $ARGV[0] eq '--benchmark') { for my $i (0..9) { - $dbh->{AutoCommit} = 1; run($dbh, $ua); - $dbh->commit; } } else { run($dbh, $ua);