X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=bayeswf.cpp;h=eee6882e4e18f59dcc9c2b8f37f950e22b3d6e13;hb=fde909c294de9806dd6337f5acb0ed87c41557c6;hp=7ee0120a592e23f1075ebef4cfc1a7cb36fbec47;hpb=d9a172a3b90abf4bbb174a432b6e63447942b14a;p=wloh diff --git a/bayeswf.cpp b/bayeswf.cpp index 7ee0120..eee6882 100644 --- a/bayeswf.cpp +++ b/bayeswf.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include @@ -9,16 +11,38 @@ #include using namespace std; +using namespace Eigen; -#define PRIOR_MU 1500 +#define PRIOR_MU 500 #define PRIOR_WEIGHT 1.0 #define MAX_PLAYERS 4096 #define DUMP_RAW 0 +#define USE_DB 1 + +#if USE_DB +#include +#include +#include +#endif float mu[MAX_PLAYERS]; float mu_stddev[MAX_PLAYERS]; -float global_sigma = 70.0f; -float prior_sigma = 70.0f; +float global_sigma; +float prior_sigma; + +// Data waiting for insertion into the database. + +struct RatingDBTuple { + int player; + float mu, mu_stddev; +}; +struct CovarianceDBTuple { + int player1, player2; + float covariance; +}; +vector rating_db_tuples; +vector covariance_db_tuples; +map, float> aux_params; #define EPSILON 1e-3 @@ -41,21 +65,16 @@ struct match { map > matches_for_player; vector all_matches; -void dump_scores(const vector &players, const float *mu, const float *mu_stddev, int num_players) +void dump_scores(const vector &players, const float *mu, const float *mu_stddev, int num_players) { -#if 0 - for (int i = 0; i < num_players; ++i) { - printf("%s=[%5.1f, %4.1f] ", players[i].c_str(), mu[i], sigma[i]); - } - printf("\n"); -#elif 0 +#if USE_DB for (int i = 0; i < num_players; ++i) { - printf("%5.1f ", mu[i]); + RatingDBTuple tuple = { players[i], mu[i], mu_stddev[i] }; + rating_db_tuples.push_back(tuple); } - printf("\n"); #else for (int i = 0; i < num_players; ++i) { - printf("%f %f %s\n", mu[i], mu_stddev[i], players[i].c_str()); + printf("%f %f %d\n", mu[i], mu_stddev[i], players[i]); } #endif } @@ -196,11 +215,15 @@ float compute_total_logl(float *mu, int num_players) * * Note that this does not depend on mu or the margin at all. */ -double hessian[MAX_PLAYERS][MAX_PLAYERS]; +Matrix hessian; void construct_hessian(const float *mu, int num_players) { - memset(hessian, 0, sizeof(hessian)); + hessian = Matrix(num_players, num_players); + hessian.fill(0.0f); + for (int i = 0; i < num_players; ++i) { + hessian(i, i) += 1.0f / (prior_sigma * prior_sigma); + } for (unsigned i = 0; i < all_matches.size(); ++i) { const match &m = all_matches[i]; @@ -210,42 +233,67 @@ void construct_hessian(const float *mu, int num_players) double sigma_sq = global_sigma * global_sigma; float w = m.weight; - hessian[p1][p2] -= w / sigma_sq; - hessian[p2][p1] -= w / sigma_sq; + hessian(p1, p2) -= w / sigma_sq; + hessian(p2, p1) -= w / sigma_sq; - hessian[p1][p1] += w / sigma_sq; - hessian[p2][p2] += w / sigma_sq; + hessian(p1, p1) += w / sigma_sq; + hessian(p2, p2) += w / sigma_sq; } } -// Compute uncertainty (stddev) of mu estimates, which is 1/sqrt(H_ii), +// Compute uncertainty (stddev) of mu estimates, which is sqrt((H^-1)_ii), // where H is the Hessian (see construct_hessian()). -void compute_mu_uncertainty(const float *mu, int num_players) +void compute_mu_uncertainty(const float *mu, const vector &players) { - memset(mu_stddev, 0, sizeof(mu_stddev)); - - for (unsigned i = 0; i < all_matches.size(); ++i) { - const match &m = all_matches[i]; - - int p1 = m.player; - int p2 = m.other_player; - - double sigma_sq = global_sigma * global_sigma; - float w = m.weight; + // FIXME: Use pseudoinverse if applicable. + Matrix ih = hessian.inverse(); + for (unsigned i = 0; i < players.size(); ++i) { + mu_stddev[i] = sqrt(ih(i, i)); + } - // Temporarily use mu_stddev to store the diagonal of the Hessian. - mu_stddev[p1] += w / sigma_sq; - mu_stddev[p2] += w / sigma_sq; +#if USE_DB + for (unsigned i = 0; i < players.size(); ++i) { + for (unsigned j = 0; j < players.size(); ++j) { + CovarianceDBTuple tuple; + tuple.player1 = players[i]; + tuple.player2 = players[j]; + tuple.covariance = ih(i, j); + covariance_db_tuples.push_back(tuple); + } } - for (int i = 0; i < num_players; ++i) { - mu_stddev[i] = 1.0f / sqrt(mu_stddev[i]); +#else + for (unsigned i = 0; i < players.size(); ++i) { + for (unsigned j = 0; j < players.size(); ++j) { + printf("covariance %d %d %f\n", + players[i], + players[j], + ih(i, j)); + } } +#endif } -int main(int argc, char **argv) +void process_file(const char *filename) { + global_sigma = 70.0f; + prior_sigma = 70.0f; + matches_for_player.clear(); + all_matches.clear(); + + FILE *fp = fopen(filename, "r"); + if (fp == NULL) { + perror(filename); + exit(1); + } + + char locale[256]; + if (fscanf(fp, "%s", locale) != 1) { + fprintf(stderr, "Could't read number of players\n"); + exit(1); + } + int num_players; - if (scanf("%d", &num_players) != 1) { + if (fscanf(fp,"%d", &num_players) != 1) { fprintf(stderr, "Could't read number of players\n"); exit(1); } @@ -255,27 +303,27 @@ int main(int argc, char **argv) exit(1); } - vector players; - map player_map; + vector players; + map player_map; for (int i = 0; i < num_players; ++i) { char buf[256]; - if (scanf("%s", buf) != 1) { + if (fscanf(fp, "%s", buf) != 1) { fprintf(stderr, "Couldn't read player %d\n", i); exit(1); } - players.push_back(buf); - player_map[buf] = i; + players.push_back(atoi(buf)); + player_map[atoi(buf)] = i; } int num_matches = 0; for ( ;; ) { - char pl1[256], pl2[256]; + int pl1, pl2; int score1, score2; float weight; - if (scanf("%s %s %d %d %f", pl1, pl2, &score1, &score2, &weight) != 5) { + if (fscanf(fp, "%d %d %d %d %f", &pl1, &pl2, &score1, &score2, &weight) != 5) { //fprintf(stderr, "Read %d matches.\n", num_matches); break; } @@ -283,11 +331,11 @@ int main(int argc, char **argv) ++num_matches; if (player_map.count(pl1) == 0) { - fprintf(stderr, "Unknown player '%s'\n", pl1); + fprintf(stderr, "Unknown player '%d'\n", pl1); exit(1); } if (player_map.count(pl2) == 0) { - fprintf(stderr, "Unknown player '%s'\n", pl2); + fprintf(stderr, "Unknown player '%d'\n", pl2); exit(1); } @@ -307,6 +355,8 @@ int main(int argc, char **argv) all_matches.push_back(m1); } + + fclose(fp); float mu[MAX_PLAYERS]; @@ -314,6 +364,7 @@ int main(int argc, char **argv) mu[i] = PRIOR_MU; } + int num_iterations = -1; for (int j = 0; j < 1000; ++j) { float old_mu[MAX_PLAYERS]; float old_global_sigma = global_sigma; @@ -337,23 +388,105 @@ int main(int argc, char **argv) sumdiff += (global_sigma - old_global_sigma) * (global_sigma - old_global_sigma); if (sumdiff < EPSILON) { //fprintf(stderr, "Converged after %d iterations. Stopping.\n", j); - printf("%d 0 -1\n", j + 1); + num_iterations = j + 1; break; } } + construct_hessian(mu, num_players); + aux_params[make_pair(locale, "num_iterations")] = num_iterations; + aux_params[make_pair(locale, "score_stddev")] = global_sigma / sqrt(2.0f); + aux_params[make_pair(locale, "rating_prior_stddev")] = prior_sigma; + aux_params[make_pair(locale, "total_log_likelihood")] = compute_total_logl(mu, num_players); + + compute_mu_uncertainty(mu, players); + dump_scores(players, mu, mu_stddev, num_players); +} + +int main(int argc, char **argv) +{ +#if USE_DB + pqxx::connection conn("dbname=wloh host=127.0.0.1 user=wloh password=oto4iCh5"); +#endif + + for (int i = 1; i < argc; ++i) { + process_file(argv[i]); + } + #if DUMP_RAW dump_raw(mu, num_players); -#else - compute_mu_uncertainty(mu, num_players); - dump_scores(players, mu, mu_stddev, num_players); - //fprintf(stderr, "Optimal sigma: %f (two-player: %f)\n", sigma[0], sigma[0] * sqrt(2.0f)); - printf("%f 0 -2\n", global_sigma / sqrt(2.0f)); - printf("%f 0 -3\n", prior_sigma); + return 0; +#endif - float total_logl = compute_total_logl(mu, num_players); - printf("%f 0 -4\n", total_logl); +#if USE_DB + pqxx::work txn(conn); + txn.exec("SET client_min_messages TO WARNING"); -// construct_hessian(mu, sigma, num_players); + // Dump aux_params. + { + txn.exec("TRUNCATE aux_params"); + pqxx::tablewriter writer(txn, "aux_params"); + for (map, float>::const_iterator it = aux_params.begin(); it != aux_params.end(); ++it) { + char str[128]; + snprintf(str, 128, "%f", it->second); + + vector tuple; + tuple.push_back(it->first.first); // locale + tuple.push_back(it->first.second); // parameter name + tuple.push_back(str); + writer.push_back(tuple); + } + writer.complete(); + } + + // Dump ratings. + { + txn.exec("TRUNCATE ratings"); + pqxx::tablewriter writer(txn, "ratings"); + for (unsigned i = 0; i < rating_db_tuples.size(); ++i) { + char player_str[128], mu_str[128], mu_stddev_str[128]; + snprintf(player_str, 128, "%d", rating_db_tuples[i].player); + snprintf(mu_str, 128, "%f", rating_db_tuples[i].mu); + snprintf(mu_stddev_str, 128, "%f", rating_db_tuples[i].mu_stddev); + + vector tuple; + tuple.push_back(player_str); + tuple.push_back(mu_str); + tuple.push_back(mu_stddev_str); + writer.push_back(tuple); + } + writer.complete(); + } + + // Create a table new_covariance, and dump covariance into it. + { + txn.exec("CREATE TABLE new_covariance ( player1 smallint NOT NULL, player2 smallint NOT NULL, cov float NOT NULL )"); + pqxx::tablewriter writer(txn, "new_covariance"); + for (unsigned i = 0; i < covariance_db_tuples.size(); ++i) { + char player1_str[128], player2_str[128], cov_str[128]; + snprintf(player1_str, 128, "%d", covariance_db_tuples[i].player1); + snprintf(player2_str, 128, "%d", covariance_db_tuples[i].player2); + snprintf(cov_str, 128, "%f", covariance_db_tuples[i].covariance); + + vector tuple; + tuple.push_back(player1_str); + tuple.push_back(player2_str); + tuple.push_back(cov_str); + writer.push_back(tuple); + } + writer.complete(); + } + + // Create index, and rename new_covariance on top of covariance. + txn.exec("ALTER TABLE new_covariance ADD PRIMARY KEY ( player1, player2 );"); + txn.exec("DROP TABLE IF EXISTS covariance"); + txn.exec("ALTER TABLE new_covariance RENAME TO covariance"); +#else + //fprintf(stderr, "Optimal sigma: %f (two-player: %f)\n", sigma[0], sigma[0] * sqrt(2.0f)); + for (map, float>::const_iterator it = aux_params.begin(); it != aux_params.end(); ++it) { + printf("%s: aux_param %s %f\n", it->first.first.c_str(), it->first.second.c_str(), it->second); + } #endif + + txn.commit(); }