From: Steinar H. Gunderson Date: Sun, 18 Mar 2012 23:44:11 +0000 (+0100) Subject: Compute and calculate standard deviations. X-Git-Url: https://git.sesse.net/?p=wloh;a=commitdiff_plain;h=72a2eedd021d38d6bb0786033c6f100062363789;hp=dca2f8f927ae7bbe19e89d3d367992bf5f9ae294 Compute and calculate standard deviations. --- diff --git a/bayeswf.cpp b/bayeswf.cpp index 6214d28..6441801 100644 --- a/bayeswf.cpp +++ b/bayeswf.cpp @@ -16,6 +16,7 @@ using namespace std; #define DUMP_RAW 0 float mu[MAX_PLAYERS]; +float mu_stddev[MAX_PLAYERS]; float global_sigma = 70.0f; float prior_sigma = 70.0f; @@ -40,7 +41,7 @@ struct match { map > matches_for_player; vector all_matches; -void dump_scores(const vector &players, const float *mu, int num_players) +void dump_scores(const vector &players, const float *mu, const float *mu_stddev, int num_players) { #if 0 for (int i = 0; i < num_players; ++i) { @@ -54,7 +55,7 @@ void dump_scores(const vector &players, const float *mu, int num_players printf("\n"); #else for (int i = 0; i < num_players; ++i) { - printf("%f %s\n", mu[i], players[i].c_str()); + printf("%f %f %s\n", mu[i], mu_stddev[i], players[i].c_str()); } #endif } @@ -196,7 +197,7 @@ float compute_total_logl(float *mu, int num_players) * Note that this does not depend on mu or the margin at all. */ double hessian[MAX_PLAYERS][MAX_PLAYERS]; -void construct_hessian(const float *mu, const float *sigma, int num_players) +void construct_hessian(const float *mu, int num_players) { memset(hessian, 0, sizeof(hessian)); @@ -215,12 +216,29 @@ void construct_hessian(const float *mu, const float *sigma, int num_players) hessian[p1][p1] += w / sigma_sq; hessian[p2][p2] += w / sigma_sq; } +} + +// Compute uncertainty (stddev) of mu estimates, which is 1/sqrt(H_ii), +// where H is the Hessian (see construct_hessian()). +void compute_mu_uncertainty(const float *mu, int num_players) +{ + memset(mu_stddev, 0, sizeof(mu_stddev)); + + for (unsigned i = 0; i < all_matches.size(); ++i) { + const match &m = all_matches[i]; + + int p1 = m.player; + int p2 = m.other_player; + double sigma_sq = global_sigma * global_sigma; + float w = m.weight; + + // Temporarily use mu_stddev to store the diagonal of the Hessian. + mu_stddev[p1] += w / sigma_sq; + mu_stddev[p2] += w / sigma_sq; + } for (int i = 0; i < num_players; ++i) { - for (int j = 0; j < num_players; ++j) { - printf("%.12f ", hessian[i][j]); - } - printf("\n"); + mu_stddev[i] = 1.0f / sqrt(mu_stddev[i]); } } @@ -327,7 +345,8 @@ int main(int argc, char **argv) #if DUMP_RAW dump_raw(mu, num_players); #else - dump_scores(players, mu, num_players); + compute_mu_uncertainty(mu, num_players); + dump_scores(players, mu, mu_stddev, num_players); //fprintf(stderr, "Optimal sigma: %f (two-player: %f)\n", sigma[0], sigma[0] * sqrt(2.0f)); printf("%f -2\n", global_sigma / sqrt(2.0f)); printf("%f -3\n", prior_sigma); diff --git a/train.pl b/train.pl index 22a2741..67b2b8d 100755 --- a/train.pl +++ b/train.pl @@ -56,13 +56,13 @@ while (my $ref = $q->fetchrow_hashref) { close DATA; $dbh->do('DELETE FROM ratings'); -my $iq = $dbh->prepare('INSERT INTO ratings ( id, rating ) VALUES (?, ?)'); +my $iq = $dbh->prepare('INSERT INTO ratings ( id, rating, rating_stddev ) VALUES (?, ?, ?)'); open RATINGS, "$config::base_dir/bayeswf < $tmpnam |" or die "bayeswf: $!"; while () { - /(.*) (.*)/ or next; - $iq->execute($2, $1); + /(.*) (.*) (.*)/ or next; + $iq->execute($3, $1, $2); } $dbh->commit;