]> git.sesse.net Git - skvidarsync/commitdiff
Speed up fuzzy searching on Slack a bit.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 10 Nov 2023 21:42:34 +0000 (22:42 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 10 Nov 2023 21:42:34 +0000 (22:42 +0100)
bin/sync.pl

index bfac355fe6dd87b2dcc8d0a937cda65a2221d29f..49fea375c2c548bf7f95310fd880f7e21749f6c9 100644 (file)
@@ -113,19 +113,22 @@ sub get_spreadsheet_name {
 }
 
 sub matches_name {
-       my ($slack_name, $spreadsheet_name) = @_;
-       if (sort_key($slack_name) eq sort_key($spreadsheet_name)) {
-               return 1;
-       }
+       my ($slack_name, $spreadsheet_name, $ap) = @_;
+
+       # No need to check for an exact match; we already did that through $seen_names.
+       # if (sort_key($slack_name) eq sort_key($spreadsheet_name)) {
+       #       return 1;
+       # }
 
-       my @ap = split /\s+/, $slack_name;
-       my @bp = split /\s+/, $spreadsheet_name;
-       if (scalar @ap >= 2 && scalar @bp >= 2 && sort_key($ap[0]) eq sort_key($bp[0])) {
+       # @ap is precalculated by the caller.
+       # my @ap = map { sort_key($_) } split /\s+/, $slack_name;
+       my @bp = map { sort_key($_) } split /\s+/, $spreadsheet_name;
+       if (scalar @$ap >= 2 && scalar @bp >= 2 && $ap->[0] eq $bp[0]) {
                # First name matches, try to match some surname
                my $found = 0;
-               for my $ai (1..$#ap) {
+               for my $ai (1..(scalar @$ap)) {
                        for my $bi (1..$#bp) {
-                               $found = 1 if (sort_key($ap[$ai]) eq sort_key($bp[$bi]));
+                               $found = 1 if ($ap->[$ai] eq $bp[$bi]);
                        }
                }
                if ($found) {
@@ -630,14 +633,17 @@ sub run {
                        # Do a search through all the available names in the sheet to find an obvious(ish) match.
                        my @candidates = ();
                        my $main_sheet_rows = $main_sheet_json->{'data'}[0]{'rowData'};
+                       $start = [Time::HiRes::gettimeofday];
+                       my @ap = map { sort_key($_) } split /\s+/, $slack_name;  # Precalc for matches_name().
                        for my $row (@$main_sheet_rows) {
                                for my $val (@{$row->{'values'}}) {
                                        my $name = get_spreadsheet_name($val);
-                                       if (defined($name) && matches_name($slack_name, $name)) {
+                                       if (defined($name) && matches_name($slack_name, $name, \@ap)) {
                                                push @candidates, $name;
                                        }
                                }
                        }
+                       log_timing($start, "Fuzzy-searching for Slack name “$slack_name”");
                        if ($#candidates == -1) {
                                skv_log("$slack_name ($userid) er påmeldt på Slack, men fant ikke et regneark-navn for dem.");
                                possibly_nag_user($dbh, $ua, $userid, $invitation_ts, undef, \%slack_userid_to_slack_name);