- double total_d = 0.0;
- for (unsigned i = 0; i < 64; ++i) {
- int best_assignment = -1;
- double best_distance = HUGE_VAL;
- for (unsigned j = 0; j < NUM_CLUSTERS; ++j) {
- double d = 0.0;
- for (unsigned k = 0; k < NUM_SYMS; ++k) {
- double p1 = (clusters[j].freqs[k] + 0.5) * inv_sum_clusters[j];
- double p2 = (stats[i + base].freqs[k] + 0.5) * inv_sum_coeffs[i];
-
- // K-L divergence is asymmetric; this is a hack.
- d += p1 * log(p1 / p2);
- d += p2 * log(p2 / p1);
- }
- if (d < best_distance) {
- best_assignment = j;
- best_distance = d;
+ for (int m = 0; m < NUM_CLUSTERS; ++m) {
+ for (int o = 0; o < 64; ++o) {
+ if (is_medoid[o]) continue;
+ int old_medoid = medoids[m];
+ medoids[m] = o;
+
+ int new_assignment[64];
+ double candidate_score = find_best_assignment(medoids, new_assignment);
+
+ if (candidate_score < current_score) {
+ current_score = candidate_score;
+ memcpy(assignment, new_assignment, sizeof(assignment));
+
+ is_medoid[old_medoid] = false;
+ is_medoid[medoids[m]] = true;
+ printf("%f: ", current_score);
+ for (int i = 0; i < 64; ++i) {
+ printf("%d ", assignment[i]);
+ }
+ printf("\n");
+ any_changed = true;
+ } else {
+ medoids[m] = old_medoid;