git.sesse.net Git - wloh/blob - bayeswf.cpp

   1 #include <stdio.h>
   2 #include <math.h>
   3 #include <string.h>
   4 #include <stdlib.h>
   5
   6 #include <map>
   7 #include <vector>
   8 #include <string>
   9 #include <algorithm>
  10
  11 using namespace std;
  12
  13 #define PRIOR_MU 1500
  14 #define PRIOR_WEIGHT 1.0
  15 #define MAX_PLAYERS 4096
  16 #define DUMP_RAW 0
  17
  18 float mu[MAX_PLAYERS];
  19 float sigma[MAX_PLAYERS];
  20 float prior_sigma = 70.0f;
  21
  22 #define EPSILON 1e-3
  23
  24 /*
  25  * L(mu_vec, sigma_vec, matches) = product[ L(mu_A, sigma_A, mu_B, sigma_B, score_AB - score_BA) ]
  26  * log-likelihood = sum[ log( L(mu_A, sigma_A, mu_B, sigma_B, score_AB - score_BA) ) ]
  27  *
  28  * L(mu1, sigma1, mu2, sigma2, score2 - score1) = sigmoid(mu2 - mu1, sqrt(sigma1² + sigma2²), (score2 - score1))
  29  *
  30  * pdf := 1/(sigma * sqrt(2*Pi)) * exp(-(x - mu)^2 / (2 * sigma^2));
  31  * pdfs := subs({ mu = mu1 - mu2, sigma = sqrt(sigma1^2 + sigma2^2) }, pdf);
  32  * diff(log(pdfs), mu1);
  33  */
  34
  35 struct match {
  36         int player, other_player;
  37         int margin;
  38         float weight;
  39 };
  40 map<int, vector<match> > matches_for_player;
  41 vector<match> all_matches;
  42
  43 void dump_scores(const vector<string> &players, const float *mu, const float *sigma, int num_players)
  44 {
  45 #if 0
  46         for (int i = 0; i < num_players; ++i) {
  47                 printf("%s=[%5.1f, %4.1f] ", players[i].c_str(), mu[i], sigma[i]);
  48         }
  49         printf("\n");
  50 #elif 0
  51         for (int i = 0; i < num_players; ++i) {
  52                 printf("%5.1f ", mu[i]);
  53         }
  54         printf("\n");
  55 #else
  56         for (int i = 0; i < num_players; ++i) {
  57                 printf("%f %s\n", mu[i], players[i].c_str());
  58         }
  59 #endif
  60 }
  61
  62 /*
  63  * diff(logL, mu1) = -w * (mu1 - mu2 - x) / sigma_c^2
  64  * maximizer for mu1 is given by: sum_i[ (w_i/sigma_c_i)^2 (mu1 - mu2_i - x_i) ] = 0
  65  *                                sum_i[ (w_i/sigma_c_i)^2 mu1 ] = sum_i [ (w_i/sigma_c_i)^2 ( mu2_i + x_i ) ]
  66  *                                mu1 = sum_i [ (w_i/sigma_c_i)^2 ( mu2_i + x_i ) ] / sum_i[ (w_i/sigma_c_i)^2 ]
  67  */
  68 void update_mu(float *mu, float *sigma, int player_num, const vector<match> &matches)
  69 {
  70         if (matches.empty()) {
  71                 return;
  72         }
  73
  74         float nom = 0.0f, denom = 0.0f;
  75
  76         // Prior.
  77         {
  78                 float inv_sigma2 = 1.0f / (prior_sigma * prior_sigma);
  79                 nom += PRIOR_WEIGHT * PRIOR_MU * inv_sigma2;
  80                 denom += PRIOR_WEIGHT * inv_sigma2;
  81         }
  82
  83         // All matches.
  84         for (unsigned i = 0; i < matches.size(); ++i) {
  85                 float sigma1 = sigma[player_num];
  86                 float sigma2 = sigma[matches[i].other_player];
  87                 float inv_sigma_c2 = matches[i].weight / (sigma1 * sigma1 + sigma2 * sigma2);
  88                 float x = matches[i].margin; // / 70.0f;
  89
  90                 nom += (mu[matches[i].other_player] + x) * inv_sigma_c2;
  91                 denom += inv_sigma_c2;
  92         }
  93         mu[player_num] = nom / denom;
  94 }
  95
  96 void dump_raw(const float *mu, const float *sigma, int num_players)
  97 {
  98         for (unsigned i = 0; i < all_matches.size(); ++i) {
  99                 const match& m = all_matches[i];
 100
 101                 float mu1 = mu[m.player];
 102                 float mu2 = mu[m.other_player];
 103                 float sigma1 = sigma[m.player];
 104                 float sigma2 = sigma[m.other_player];
 105                 float sigma = sqrt(sigma1 * sigma1 + sigma2 * sigma2);
 106                 float mu = mu1 - mu2;
 107                 float x = m.margin;
 108                 float w = m.weight;
 109
 110                 printf("%f %f\n", (x - mu) / sigma, w);
 111         }
 112 }
 113
 114 /*
 115  * diff(logL, sigma1) = sigma1 (-sigma1² - sigma2² + (x - mu)²) / sigma_c²
 116  * maximizer for sigma1 is given by: sum_i[ (1/sigma_c_i)² sigma1 ((x - mu)² - (sigma1² + sigma2²) ] = 0
 117  *                                   sum_i[ (x - mu)² - sigma1² - sigma2² ] = 0                                  |: sigma1 != 0, sigma2 != 0
 118  *                                   sum_i[ (x - mu)² - sigma2² ] = sum[ sigma1² ]
 119  *                                   sigma1 = sqrt( sum_i[ (x - mu)² - sigma2² ] / N )
 120  */
 121 void update_sigma(float *mu, float *sigma, int player_num, const vector<match> &matches)
 122 {
 123         if (matches.size() < 2) {
 124                 return;
 125         }
 126
 127         float sum = 0.0f;
 128         for (unsigned i = 0; i < matches.size(); ++i) {
 129                 float mu1 = mu[player_num];
 130                 float mu2 = mu[matches[i].other_player];
 131                 float mu = mu1 - mu2;
 132                 float sigma2 = sigma[matches[i].other_player];
 133                 float x = matches[i].margin;
 134
 135                 //fprintf(stderr, "x=%f mu=%f sigma2=%f   add %f-%f = %f\n", x, mu, sigma2, (x-mu)*(x-mu), sigma2*sigma2, (x - mu) * (x - mu) - sigma2 * sigma2);
 136                 sum += (x - mu) * (x - mu) - sigma2 * sigma2;
 137         }
 138
 139         if (sum <= 0) {
 140                 return;
 141         }
 142         //fprintf(stderr, "sum=%f\n", sum);
 143         sigma[player_num] = sqrt(sum / matches.size());
 144 }
 145
 146 /*
 147  * diff(logL, sigma) = w ( (x - mu)² - sigma² ) / sigma³
 148  * maximizer for sigma is given by: sum_i[ (w_i/sigma)³ ((x - mu)² - sigma²) ] = 0
 149  *                                   sum_i[ w_i ( (x - mu)² - sigma² ) ] = 0                            |: sigma != 0
 150  *                                   sum_i[ w_i (x - mu)² ] = sum[ w_i sigma² ]
 151  *                                   sigma = sqrt( sum_i[ w_i (x - mu)² ] / sum[w_i] )
 152  */
 153 void update_global_sigma(float *mu, float *sigma, int num_players)
 154 {
 155         float nom = 0.0f, denom = 0.0f;
 156         for (unsigned i = 0; i < all_matches.size(); ++i) {
 157                 const match& m = all_matches[i];
 158
 159                 float mu1 = mu[m.player];
 160                 float mu2 = mu[m.other_player];
 161                 float mu = mu1 - mu2;
 162                 float x = m.margin;
 163                 float w = m.weight;
 164
 165                 nom += w * ((x - mu) * (x - mu));
 166                 denom += w;
 167         }
 168
 169         float best_sigma = sqrt(nom / denom) / sqrt(2.0f);  // Divide evenly between the two players.
 170         for (int i = 0; i < num_players; ++i) {
 171                 sigma[i] = best_sigma;
 172         }
 173 }
 174
 175 /*
 176  * diff(priorlogL, sigma) = w ( (x - mu)² - sigma² ) / sigma³
 177  * maximizer for sigma is given by: sum_i[ (w_i/sigma)³ ((x - mu)² - sigma²) ] = 0
 178  *                                   sum_i[ w_i ( (x - mu)² - sigma² ) ] = 0                            |: sigma != 0
 179  *                                   sum_i[ w_i (x - mu)² ] = sum[ w_i sigma² ]
 180  *                                   sigma = sqrt( sum_i[ w_i (x - mu)² ] / sum[w_i] )
 181  */
 182 void update_prior_sigma(float *mu, float *sigma, int num_players)
 183 {
 184         float nom = 0.0f, denom = 0.0f;
 185         for (int i = 0; i < num_players; ++i) {
 186                 float mu1 = mu[i];
 187
 188                 nom += ((mu1 - PRIOR_MU) * (mu1 - PRIOR_MU));
 189                 denom += 1.0f;
 190         }
 191
 192         prior_sigma = sqrt(nom / denom);
 193         if (!(prior_sigma > 40.0f)) {
 194                 prior_sigma = 40.0f;
 195         }
 196 }
 197
 198 float compute_logl(float z)
 199 {
 200         return -0.5 * (log(2.0f / M_PI) + z * z);
 201 }
 202
 203 float compute_total_logl(float *mu, float *sigma, int num_players)
 204 {
 205         float total_logl = 0.0f;
 206
 207         // Prior.
 208         for (int i = 0; i < num_players; ++i) {
 209                 total_logl += PRIOR_WEIGHT * compute_logl((mu[i] - PRIOR_MU) / prior_sigma);
 210         }
 211
 212         // Matches.
 213         for (unsigned i = 0; i < all_matches.size(); ++i) {
 214                 const match& m = all_matches[i];
 215
 216                 float mu1 = mu[m.player];
 217                 float mu2 = mu[m.other_player];
 218                 float sigma1 = sigma[m.player];
 219                 float sigma2 = sigma[m.other_player];
 220                 float sigma = sqrt(sigma1 * sigma1 + sigma2 * sigma2);
 221                 float mu = mu1 - mu2;
 222                 float x = m.margin;
 223                 float w = m.weight;
 224
 225                 total_logl += w * compute_logl((x - mu) / sigma);
 226         }
 227
 228         return total_logl;
 229 }
 230
 231 /*
 232  * Compute Hessian matrix of the negative log-likelihood, ie. for each term in logL:
 233  *
 234  * M_ij = D_i D_j (- logL) = -w / sigma²                                for i != j
 235  *                            w / sigma²                                for i == j
 236  *
 237  * Note that this does not depend on mu or the margin at all.
 238  */
 239 double hessian[MAX_PLAYERS][MAX_PLAYERS];
 240 void construct_hessian(const float *mu, const float *sigma, int num_players)
 241 {
 242         memset(hessian, 0, sizeof(hessian));
 243
 244         for (unsigned i = 0; i < all_matches.size(); ++i) {
 245                 const match &m = all_matches[i];
 246
 247                 int p1 = m.player;
 248                 int p2 = m.other_player;
 249
 250                 double sigma1 = sigma[m.player];
 251                 double sigma2 = sigma[m.other_player];
 252
 253                 double sigma_sq = sigma1 * sigma1 + sigma2 * sigma2;
 254                 float w = m.weight;
 255
 256                 hessian[p1][p2] -= w / sigma_sq;
 257                 hessian[p2][p1] -= w / sigma_sq;
 258
 259                 hessian[p1][p1] += w / sigma_sq;
 260                 hessian[p2][p2] += w / sigma_sq;
 261         }
 262
 263         for (int i = 0; i < num_players; ++i) {
 264                 for (int j = 0; j < num_players; ++j) {
 265                         printf("%.12f ", hessian[i][j]);
 266                 }
 267                 printf("\n");
 268         }
 269 }
 270
 271 int main(int argc, char **argv)
 272 {
 273         int num_players;
 274         if (scanf("%d", &num_players) != 1) {
 275                 fprintf(stderr, "Could't read number of players\n");
 276                 exit(1);
 277         }
 278
 279         if (num_players > MAX_PLAYERS) {
 280                 fprintf(stderr, "Max %d players supported\n", MAX_PLAYERS);
 281                 exit(1);
 282         }
 283
 284         vector<string> players;
 285         map<string, int> player_map;
 286
 287         for (int i = 0; i < num_players; ++i) {
 288                 char buf[256];
 289                 if (scanf("%s", buf) != 1) {
 290                         fprintf(stderr, "Couldn't read player %d\n", i);
 291                         exit(1);
 292                 }
 293
 294                 players.push_back(buf);
 295                 player_map[buf] = i;
 296         }
 297
 298         int num_matches = 0;
 299         for ( ;; ) {
 300                 char pl1[256], pl2[256];
 301                 int score1, score2;
 302                 float weight;
 303
 304                 if (scanf("%s %s %d %d %f", pl1, pl2, &score1, &score2, &weight) != 5) {
 305                         //fprintf(stderr, "Read %d matches.\n", num_matches);
 306                         break;
 307                 }
 308
 309                 ++num_matches;
 310
 311                 if (player_map.count(pl1) == 0) {
 312                         fprintf(stderr, "Unknown player '%s'\n", pl1);
 313                         exit(1);
 314                 }
 315                 if (player_map.count(pl2) == 0) {
 316                         fprintf(stderr, "Unknown player '%s'\n", pl2);
 317                         exit(1);
 318                 }
 319
 320                 match m1;
 321                 m1.player = player_map[pl1];
 322                 m1.other_player = player_map[pl2];
 323                 m1.margin = score1 - score2;
 324                 m1.weight = weight;
 325                 matches_for_player[player_map[pl1]].push_back(m1);
 326
 327                 match m2;
 328                 m2.player = player_map[pl2];
 329                 m2.other_player = player_map[pl1];
 330                 m2.margin = score2 - score1;
 331                 m2.weight = weight;
 332                 matches_for_player[player_map[pl2]].push_back(m2);
 333
 334                 all_matches.push_back(m1);
 335         }
 336
 337         float mu[MAX_PLAYERS];
 338         float sigma[MAX_PLAYERS];
 339
 340         for (int i = 0; i < num_players; ++i) {
 341                 mu[i] = PRIOR_MU;
 342                 sigma[i] = 70.0f / sqrt(2.0f);
 343         }
 344
 345         for (int j = 0; j < 1000; ++j) {
 346                 float old_mu[MAX_PLAYERS];
 347                 float old_sigma[MAX_PLAYERS];
 348                 float old_prior_sigma = prior_sigma;
 349                 memcpy(old_mu, mu, sizeof(mu));
 350                 memcpy(old_sigma, sigma, sizeof(sigma));
 351                 for (int i = 0; i < num_players; ++i) {
 352                         update_mu(mu, sigma, i, matches_for_player[i]);
 353                 }
 354                 update_global_sigma(mu, sigma, num_players);
 355                 update_prior_sigma(mu, sigma, num_players);
 356                 /* for (int i = 0; i < num_players; ++i) {
 357                         update_sigma(mu, sigma, i, matches_for_player[i]);
 358                         dump_scores(players, mu, sigma, num_players);
 359                 } */
 360
 361                 float sumdiff = 0.0f;
 362                 for (int i = 0; i < num_players; ++i) {
 363                         sumdiff += (mu[i] - old_mu[i]) * (mu[i] - old_mu[i]);
 364                         sumdiff += (sigma[i] - old_sigma[i]) * (sigma[i] - old_sigma[i]);
 365                 }
 366                 sumdiff += (prior_sigma - old_prior_sigma) * (prior_sigma - old_prior_sigma);
 367                 if (sumdiff < EPSILON) {
 368                         //fprintf(stderr, "Converged after %d iterations. Stopping.\n", j);
 369                         printf("%d -1\n", j + 1);
 370                         break;
 371                 }
 372         }
 373
 374 #if DUMP_RAW
 375         dump_raw(mu, sigma, num_players);
 376 #else
 377         dump_scores(players, mu, sigma, num_players);
 378         //fprintf(stderr, "Optimal sigma: %f (two-player: %f)\n", sigma[0], sigma[0] * sqrt(2.0f));
 379         printf("%f -2\n", sigma[0]);
 380         printf("%f -3\n", prior_sigma);
 381
 382         float total_logl = compute_total_logl(mu, sigma, num_players);
 383         printf("%f -4\n", total_logl);
 384
 385 //      construct_hessian(mu, sigma, num_players);
 386 #endif
 387 }