From: Steinar H. Gunderson Date: Fri, 10 Nov 2023 21:42:34 +0000 (+0100) Subject: Speed up fuzzy searching on Slack a bit. X-Git-Url: https://git.sesse.net/?p=skvidarsync;a=commitdiff_plain;h=54e162c1a11f895ccdd10f255100f95c4de72fcb Speed up fuzzy searching on Slack a bit. --- diff --git a/bin/sync.pl b/bin/sync.pl index bfac355..49fea37 100644 --- a/bin/sync.pl +++ b/bin/sync.pl @@ -113,19 +113,22 @@ sub get_spreadsheet_name { } sub matches_name { - my ($slack_name, $spreadsheet_name) = @_; - if (sort_key($slack_name) eq sort_key($spreadsheet_name)) { - return 1; - } + my ($slack_name, $spreadsheet_name, $ap) = @_; + + # No need to check for an exact match; we already did that through $seen_names. + # if (sort_key($slack_name) eq sort_key($spreadsheet_name)) { + # return 1; + # } - my @ap = split /\s+/, $slack_name; - my @bp = split /\s+/, $spreadsheet_name; - if (scalar @ap >= 2 && scalar @bp >= 2 && sort_key($ap[0]) eq sort_key($bp[0])) { + # @ap is precalculated by the caller. + # my @ap = map { sort_key($_) } split /\s+/, $slack_name; + my @bp = map { sort_key($_) } split /\s+/, $spreadsheet_name; + if (scalar @$ap >= 2 && scalar @bp >= 2 && $ap->[0] eq $bp[0]) { # First name matches, try to match some surname my $found = 0; - for my $ai (1..$#ap) { + for my $ai (1..(scalar @$ap)) { for my $bi (1..$#bp) { - $found = 1 if (sort_key($ap[$ai]) eq sort_key($bp[$bi])); + $found = 1 if ($ap->[$ai] eq $bp[$bi]); } } if ($found) { @@ -630,14 +633,17 @@ sub run { # Do a search through all the available names in the sheet to find an obvious(ish) match. my @candidates = (); my $main_sheet_rows = $main_sheet_json->{'data'}[0]{'rowData'}; + $start = [Time::HiRes::gettimeofday]; + my @ap = map { sort_key($_) } split /\s+/, $slack_name; # Precalc for matches_name(). for my $row (@$main_sheet_rows) { for my $val (@{$row->{'values'}}) { my $name = get_spreadsheet_name($val); - if (defined($name) && matches_name($slack_name, $name)) { + if (defined($name) && matches_name($slack_name, $name, \@ap)) { push @candidates, $name; } } } + log_timing($start, "Fuzzy-searching for Slack name “$slack_name”"); if ($#candidates == -1) { skv_log("$slack_name ($userid) er påmeldt på Slack, men fant ikke et regneark-navn for dem."); possibly_nag_user($dbh, $ua, $userid, $invitation_ts, undef, \%slack_userid_to_slack_name);