Trigger on down-flanks instead of up-flanks.
[c64tapwav] / sync.cpp
1 // A program that tries to sync up the two channels, using Viterbi decoding
2 // to find the most likely misalignment as it changes throughout the file.
3
4 #define NUM_THEORIES 200
5 #define THEORY_FROM -10.0  /* in samples */
6 #define THEORY_TO 10.0  /* in samples */
7 #define SWITCH_COST 1000.0  /* pretty arbitrary */
8
9 #include <stdio.h>
10 #include <math.h>
11 #include <vector>
12 #include <algorithm>
13
14 #include "interpolate.h"
15
16 #define BUFSIZE 4096
17
18 struct stereo_sample {
19         short left, right;
20 };
21 struct double_stereo_sample {
22         double left, right;
23 };
24
25 inline short clip(int x)
26 {
27         if (x < -32768) {
28                 return x;
29         } else if (x > 32767) {
30                 return 32767;
31         } else {
32                 return short(x);
33         }
34 }
35
36 struct hypothesis {
37         int id;
38         double cost;
39         hypothesis *prev;
40 };
41
42 int main(int argc, char **argv)
43 {
44         std::vector<stereo_sample> pcm;
45
46         while (!feof(stdin)) {
47                 stereo_sample buf[BUFSIZE];
48                 ssize_t ret = fread(buf, sizeof(stereo_sample), BUFSIZE, stdin);
49                 if (ret >= 0) {
50                         pcm.insert(pcm.end(), buf, buf + ret);
51                 }
52         }
53
54         double sum_left = 0.0, sum_right = 0.0;
55         for (unsigned i = 0; i < pcm.size(); ++i) {
56                 sum_left += pcm[i].left;
57                 sum_right += pcm[i].right;
58         }
59         double mean_left = sum_left / pcm.size();
60         double mean_right = sum_right / pcm.size();
61
62         //fprintf(stderr, "Mean: L=%f R=%f\n", mean_left, mean_right);
63
64         double sum2_left = 0.0, sum2_right = 0.0;
65         for (unsigned i = 0; i < pcm.size(); ++i) {
66                 sum2_left += (pcm[i].left - mean_left) * (pcm[i].left - mean_left);
67                 sum2_right += (pcm[i].right - mean_right) * (pcm[i].right - mean_right);
68         }
69         double var_left = sum2_left / (pcm.size() - 1);
70         double var_right = sum2_right / (pcm.size() - 1);
71
72         //fprintf(stderr, "Stddev: L=%f R=%f\n", sqrt(var_left), sqrt(var_right));
73
74         double inv_sd_left = 1.0 / sqrt(var_left);
75         double inv_sd_right = 1.0 / sqrt(var_right);
76
77         std::vector<double_stereo_sample> norm;
78         norm.resize(pcm.size());
79
80         for (unsigned i = 0; i < pcm.size(); ++i) {
81                 norm[i].left = (pcm[i].left - mean_left) * inv_sd_left;
82                 norm[i].right = (pcm[i].right - mean_right) * inv_sd_right;
83         }
84
85 #if 0
86         double offset = 0.0;
87         double old_diff = 0.0;
88         for (unsigned i = 0; i < pcm.size(); ++i) {
89                 double left = (pcm[i].left - mean_left) * inv_sd_left;
90                 double right = (interpolate(pcm, i + offset) - mean_right) * inv_sd_right;
91
92                 double diff = right - left;
93                 old_diff = old_diff * 0.9999 + diff * 0.0001;
94                 offset -= 0.1 * old_diff;
95
96                 if (i % 100 == 0) {
97                         fprintf(stderr, "%7.3f: %7.3f [diff=%8.3f lagged diff=%8.3f]\n", i / 44100.0, offset, diff, old_diff);
98                 }
99                 printf("%f %f %f\n", i / 44100.0, left, right);
100         }
101 #endif
102
103         double delays[NUM_THEORIES];
104         hypothesis *bases = new hypothesis[NUM_THEORIES];
105
106         for (int h = 0; h < NUM_THEORIES; ++h) {
107                 delays[h] = THEORY_FROM + h * (THEORY_TO - THEORY_FROM) / (NUM_THEORIES - 1);
108                 bases[h].id = h;
109                 bases[h].cost = 0.0;
110                 bases[h].prev = NULL;
111         }
112
113         fprintf(stderr, "Matching blocks... %7.2f", 0.0);
114         hypothesis *prev_hyp = bases;
115         size_t total_end = pcm.size();
116         //size_t total_end = 4410000;
117         for (unsigned i = 0; i < total_end; i += BUFSIZE) {
118                 fprintf(stderr, "\b\b\b\b\b\b\b%7.2f", i / 44100.0);
119                 size_t end = std::min<size_t>(i + BUFSIZE, total_end);
120         
121                 hypothesis *hyp = new hypothesis[NUM_THEORIES];
122
123                 // evaluate all hypotheses
124                 for (int h = 0; h < NUM_THEORIES; ++h) {
125                         double sum = 0.0;
126                         double d = delays[h];
127                         for (unsigned s = i; s < end; ++s) {
128                                 double left = norm[s].left;
129                                 double right = linear_interpolate_right(norm, s + d);
130                                 double diff = (right - left) * (right - left);
131                                 sum += diff;
132                         }
133
134                         double best_cost = HUGE_VAL;
135                         hypothesis *best_prev = NULL;
136                         for (int hp = 0; hp < NUM_THEORIES; ++hp) {
137                                 double switch_cost = SWITCH_COST * fabs(delays[hp] - delays[h]);
138                                 double cost = prev_hyp[hp].cost + sum + switch_cost;
139                                 if (best_prev == NULL || cost < best_cost) {
140                                         best_cost = cost;
141                                         best_prev = &prev_hyp[hp];
142                                 }
143                         }
144
145                         hyp[h].id = h;
146                         hyp[h].cost = best_cost;
147                         hyp[h].prev = best_prev;
148                 }
149
150                 prev_hyp = hyp;
151         }
152         fprintf(stderr, "\b\b\b\b\b\b\b%7.2f\n", total_end / 44100.0);
153
154         // best winner
155         double best_cost = HUGE_VAL;
156         hypothesis *best_hyp = NULL;
157         for (int h = 0; h < NUM_THEORIES; ++h) {
158                 if (best_hyp == NULL || prev_hyp[h].cost < best_cost) {
159                         best_cost = prev_hyp[h].cost;
160                         best_hyp = &prev_hyp[h];
161                 }
162         }
163
164         // trace the path backwards
165         std::vector<double> best_path;
166         while (best_hyp != NULL) {
167                 best_path.push_back(delays[best_hyp->id]);
168                 best_hyp = best_hyp->prev;
169         }
170
171         reverse(best_path.begin(), best_path.end());
172
173         fprintf(stderr, "Writing misalignment graphs to misalignment.plot...\n");
174         FILE *fp = fopen("misalignment.plot", "w");
175         for (unsigned i = 0; i < best_path.size(); ++i) {
176                 fprintf(fp, "%f %f\n", i * BUFSIZE / 44100.0, best_path[i]);
177         }
178         fclose(fp);
179
180         fprintf(stderr, "Stretching right channel to match left... %7.2f%%", 0.0);
181         double inv_sd = sqrt(2.0) / sqrt(var_left + var_right);
182         std::vector<stereo_sample> aligned_pcm;
183         std::vector<short> mono_pcm;
184         for (unsigned i = 0; i < total_end; ++i) {
185                 double d = linear_interpolate(best_path, i / double(BUFSIZE));
186                 int left = pcm[i].left;
187                 int right = linear_interpolate_right(pcm, i + d);
188
189                 stereo_sample ss;
190                 ss.left = left;
191                 ss.right = clip(right);
192                 aligned_pcm.push_back(ss);
193
194                 mono_pcm.push_back(clip(lrintf(inv_sd * 4096.0 * (left + right))));
195
196                 if (i % 4096 == 0) {
197                         fprintf(stderr, "\b\b\b\b\b\b\b\b%7.2f%%", 100.0 * i / total_end);
198                 }
199         }
200         fprintf(stderr, "\b\b\b\b\b\b\b%7.2f%%\n", 100.0);
201
202         fprintf(stderr, "Writing realigned stereo channels to aligned.raw...\n");
203         fp = fopen("aligned.raw", "wb");
204         fwrite(aligned_pcm.data(), sizeof(stereo_sample) * aligned_pcm.size(), 1, fp);
205         fclose(fp);
206
207         fprintf(stderr, "Writing combined mono track to combined.raw...\n");
208         fp = fopen("combined.raw", "wb");
209         fwrite(mono_pcm.data(), sizeof(short) * mono_pcm.size(), 1, fp);
210         fclose(fp);
211
212         fprintf(stderr, "All done.\n");
213
214 #if 0
215
216         for (int sec = 0; sec < 2400; ++sec) {
217                 double from_offset = -128.0;
218                 double to_offset = 128.0;
219                 double best_offset = HUGE_VAL;
220                 for (int iter = 0; iter < 5; ++iter) {  
221                         //printf("from %f to %f\n", from_offset, to_offset);
222                         double best_sum = HUGE_VAL;
223                 
224                         for (int k = 0; k < 100; ++k) {
225                                 double offset = from_offset + k * 0.01 * (to_offset - from_offset);
226                                 double sum = 0.0;
227                                 for (unsigned i = sec * 4410; i < sec * 4410 + 4410; ++i) {
228                                 //for (unsigned i = 100000; i < 10NUM_THEORIES0; ++i) {
229                                         double left = norm[i].left;
230                                         double right = interpolate(norm, i + offset);
231                                         //double right = (norm[i + offset].right - mean_right) * inv_sd_right;
232                                         double diff = (right - left) * (right - left);
233                                         sum += diff;
234                                 }
235                 
236                                 if (sum < best_sum) {
237                                         best_sum = sum;
238                                         best_offset = offset;
239                                 }
240
241                                 //printf("%f %f\n", offset, sqrt(sum / NUM_THEORIES00.0f));
242                                 //fflush(stdout);
243                         }
244                 
245                         double range = 0.1 * (to_offset - from_offset);
246                         from_offset = best_offset - range;
247                         to_offset = best_offset + range;
248                 }
249                 printf("%d %f\n", sec, best_offset);
250                 fflush(stdout);
251         }
252 #endif
253         
254
255 }