Switch to using Simpson's rule for the integration, and discouple the two

[foosball] / foorank.cpp
diff --git a/foorank.cpp b/foorank.cpp

index 659c6112877b439cadf3409afcf487db86691a69..b7d9011194e338df5aed675437f6885144a69e70 100644 (file)
--- a/foorank.cpp
+++ b/foorank.cpp
@@ -5,8 +5,12 @@
  #include <vector>
  #include <algorithm>
  
-// integration step size
-static const double step_size = 10.0;
+// step sizes
+static const double int_step_size = 50.0;
+static const double pdf_step_size = 10.0;
+
+// rating constant (see below)
+static const double rating_constant = 455.0;
  
  using namespace std;
  
@@ -32,7 +36,7 @@ double prodai(double a);
  //
  double prob_score(double a, double rd)
  {
-       return prob_score_real(a, prodai(a), rd/455.0);
+       return prob_score_real(a, prodai(a), rd/rating_constant);
  }
  
  // Same, but takes in Product(a+i, i=1..9) as an argument in addition to a. Faster
@@ -71,19 +75,34 @@ double prodai(double a)
  // Set the last parameter to 1.0 if player 1 won, or -1.0 if player 2 won.
  // In the latter case, ProbScore will be given (r1-r2) instead of (r2-r1).
  //
+static inline double evaluate_int_point(double a, double prodai_precompute, double r1, double mu2, double sigma2, double winfac, double x);
+
  double opponent_rating_pdf(double a, double r1, double mu2, double sigma2, double winfac)
  {
-       double sum = 0.0;
         double prodai_precompute = prodai(a);
-       winfac /= 455.0;
-       for (double r2 = 0.0; r2 < 3000.0; r2 += step_size) {
-               double x = r2 + step_size*0.5;
-               double probscore = prob_score_real(a, prodai_precompute, (r1 - x)*winfac);
-               double z = (x - mu2)/sigma2;
-               double gaussian = exp(-(z*z/2.0));
-               sum += step_size * probscore * gaussian;
+       winfac /= rating_constant;
+
+       int n = int(3000.0 / int_step_size + 0.5);
+       double h = 3000.0 / double(n);
+       double sum = evaluate_int_point(a, prodai_precompute, r1, mu2, sigma2, winfac, 0.0);
+
+       for (int i = 1; i < n; i += 2) {
+               sum += 4.0 * evaluate_int_point(a, prodai_precompute, r1, mu2, sigma2, winfac, i * h);
+       }
+       for (int i = 2; i < n; i += 2) {
+               sum += 2.0 * evaluate_int_point(a, prodai_precompute, r1, mu2, sigma2, winfac, i * h);
         }
-       return sum;
+       sum += evaluate_int_point(a, prodai_precompute, r1, mu2, sigma2, winfac, 3000.0);
+
+       return (h/3.0) * sum;
+}
+
+static inline double evaluate_int_point(double a, double prodai_precompute, double r1, double mu2, double sigma2, double winfac, double x)
+{
+       double probscore = prob_score_real(a, prodai_precompute, (r1 - x)*winfac);
+       double z = (x - mu2)/sigma2;
+       double gaussian = exp(-(z*z/2.0));
+       return  probscore * gaussian;
  }
  
  // normalize the curve so we know that A ~= 1
@@ -215,12 +234,12 @@ void solve3x3(double *A, double *x, double *B)
  }
  
  // Give an OK starting estimate for the least squares, by numerical integration
-// of x*f(x) and x^2 * f(x). Somehow seems to underestimate sigma, though.
+// of statistical moments.
  void estimate_musigma(vector<pair<double, double> > &curve, double &mu_result, double &sigma_result)
  {
-       double mu = 0.0;
-       double sigma = 0.0;
         double sum_area = 0.0;
+       double ex = 0.0;
+       double ex2 = 0.0;
  
         for (unsigned i = 1; i < curve.size(); ++i) {
                 double x1 = curve[i].first;
@@ -230,17 +249,24 @@ void estimate_musigma(vector<pair<double, double> > &curve, double &mu_result, d
                 double xm = 0.5 * (x0 + x1);
                 double ym = 0.5 * (y0 + y1);
                 sum_area += (x1-x0) * ym;
-               mu += (x1-x0) * xm * ym;
-               sigma += (x1-x0) * xm * xm * ym;
+               ex += (x1-x0) * xm * ym;
+               ex2 += (x1-x0) * xm * xm * ym;
         }
  
-       mu_result = mu / sum_area;
-       sigma_result = sqrt(sigma) / sum_area;
+       ex /= sum_area;
+       ex2 /= sum_area;
+
+       mu_result = ex;
+       sigma_result = sqrt(ex2 - ex * ex);
  }
         
  // Find best fit of the data in curves to a Gaussian pdf, based on the
  // given initial estimates. Works by nonlinear least squares, iterating
  // until we're below a certain threshold.
+//
+// Note that the algorithm blows up quite hard if the initial estimate is
+// not good enough. Use estimate_musigma to get a reasonable starting
+// estimate.
  void least_squares(vector<pair<double, double> > &curve, double mu1, double sigma1, double &mu_result, double &sigma_result)
  {
         double A = 1.0;
@@ -318,13 +344,13 @@ int main(int argc, char **argv)
         vector<pair<double, double> > curve;
  
         if (score1 == 10) {
-               for (double r1 = 0.0; r1 < 3000.0; r1 += step_size) {
+               for (double r1 = 0.0; r1 < 3000.0; r1 += pdf_step_size) {
                         double z = (r1 - mu1) / sigma1;
                         double gaussian = exp(-(z*z/2.0));
                         curve.push_back(make_pair(r1, gaussian * opponent_rating_pdf(score2, r1, mu2, sigma2, 1.0)));
                 }
         } else {
-               for (double r1 = 0.0; r1 < 3000.0; r1 += step_size) {
+               for (double r1 = 0.0; r1 < 3000.0; r1 += pdf_step_size) {
                         double z = (r1 - mu1) / sigma1;
                         double gaussian = exp(-(z*z/2.0));
                         curve.push_back(make_pair(r1, gaussian * opponent_rating_pdf(score1, r1, mu2, sigma2, -1.0)));