Add a program to sync up two stereo channels.
[c64tapwav] / sync.cpp
1 // A program that tries to sync up the two channels, using Viterbi decoding
2 // to find the most likely misalignment as it changes throughout the file.
3
4 #define NUM_THEORIES 200
5 #define THEORY_FROM -10.0  /* in samples */
6 #define THEORY_TO 10.0  /* in samples */
7 #define SWITCH_COST 1000.0  /* pretty arbitrary */
8
9 #include <stdio.h>
10 #include <math.h>
11 #include <vector>
12 #include <algorithm>
13
14 #define LANCZOS_RADIUS 30
15 #define BUFSIZE 4096
16
17 struct stereo_sample {
18         short left, right;
19 };
20 struct double_stereo_sample {
21         double left, right;
22 };
23
24 inline short clip(int x)
25 {
26         if (x < -32768) {
27                 return x;
28         } else if (x > 32767) {
29                 return 32767;
30         } else {
31                 return short(x);
32         }
33 }
34
35 double sinc(double x)
36 {
37         if (fabs(x) < 1e-6) {
38                 return 1.0f - fabs(x);
39         } else {
40                 return sin(x) / x;
41         }
42 }
43
44 #if 0
45 double weight(double x)
46 {
47         if (fabs(x) > LANCZOS_RADIUS) {
48                 return 0.0f;
49         }
50         return sinc(M_PI * x) * sinc(M_PI * x / LANCZOS_RADIUS);
51 }
52
53 double interpolate(const std::vector<double_stereo_sample> &pcm, double i)
54 {
55         int lower = std::max<int>(ceil(i - LANCZOS_RADIUS), 0);
56         int upper = std::min<int>(floor(i + LANCZOS_RADIUS), pcm.size() - 1);
57         double sum = 0.0f;
58
59         for (int x = lower; x <= upper; ++x) {
60                 sum += pcm[x].right * weight(i - x);
61         }
62         return sum;
63 }
64 #else
65 double weight(double x)
66 {
67         if (fabs(x) > 1.0f) {
68                 return 0.0f;
69         }
70         return 1.0f - fabs(x);
71 }
72
73 inline double interpolate(const std::vector<double> &pcm, double i)
74 {
75         int ii = int(i);
76         if (ii < 0 || ii >= int(pcm.size() - 1)) {
77                 return 0.0;
78         }
79         double frac = i - ii;
80
81         return pcm[ii] + frac * (pcm[ii + 1] - pcm[ii]);
82 }
83
84 template<class T>
85 inline double interpolate(const std::vector<T> &pcm, double i)
86 {
87         int ii = int(i);
88         if (ii < 0 || ii >= int(pcm.size() - 1)) {
89                 return 0.0;
90         }
91         double frac = i - ii;
92
93         return pcm[ii].right + frac * (pcm[ii + 1].right - pcm[ii].right);
94 }
95 #endif
96
97 struct hypothesis {
98         int id;
99         double cost;
100         hypothesis *prev;
101 };
102
103 int main(int argc, char **argv)
104 {
105         std::vector<stereo_sample> pcm;
106
107         while (!feof(stdin)) {
108                 stereo_sample buf[BUFSIZE];
109                 ssize_t ret = fread(buf, sizeof(stereo_sample), BUFSIZE, stdin);
110                 if (ret >= 0) {
111                         pcm.insert(pcm.end(), buf, buf + ret);
112                 }
113         }
114
115         double sum_left = 0.0, sum_right = 0.0;
116         for (unsigned i = 0; i < pcm.size(); ++i) {
117                 sum_left += pcm[i].left;
118                 sum_right += pcm[i].right;
119         }
120         double mean_left = sum_left / pcm.size();
121         double mean_right = sum_right / pcm.size();
122
123         //fprintf(stderr, "Mean: L=%f R=%f\n", mean_left, mean_right);
124
125         double sum2_left = 0.0, sum2_right = 0.0;
126         for (unsigned i = 0; i < pcm.size(); ++i) {
127                 sum2_left += (pcm[i].left - mean_left) * (pcm[i].left - mean_left);
128                 sum2_right += (pcm[i].right - mean_right) * (pcm[i].right - mean_right);
129         }
130         double var_left = sum2_left / (pcm.size() - 1);
131         double var_right = sum2_right / (pcm.size() - 1);
132
133         //fprintf(stderr, "Stddev: L=%f R=%f\n", sqrt(var_left), sqrt(var_right));
134
135         double inv_sd_left = 1.0 / sqrt(var_left);
136         double inv_sd_right = 1.0 / sqrt(var_right);
137
138         std::vector<double_stereo_sample> norm;
139         norm.resize(pcm.size());
140
141         for (unsigned i = 0; i < pcm.size(); ++i) {
142                 norm[i].left = (pcm[i].left - mean_left) * inv_sd_left;
143                 norm[i].right = (pcm[i].right - mean_right) * inv_sd_right;
144         }
145
146 #if 0
147         double offset = 0.0;
148         double old_diff = 0.0;
149         for (unsigned i = 0; i < pcm.size(); ++i) {
150                 double left = (pcm[i].left - mean_left) * inv_sd_left;
151                 double right = (interpolate(pcm, i + offset) - mean_right) * inv_sd_right;
152
153                 double diff = right - left;
154                 old_diff = old_diff * 0.9999 + diff * 0.0001;
155                 offset -= 0.1 * old_diff;
156
157                 if (i % 100 == 0) {
158                         fprintf(stderr, "%7.3f: %7.3f [diff=%8.3f lagged diff=%8.3f]\n", i / 44100.0, offset, diff, old_diff);
159                 }
160                 printf("%f %f %f\n", i / 44100.0, left, right);
161         }
162 #endif
163
164         double delays[NUM_THEORIES];
165         hypothesis *bases = new hypothesis[NUM_THEORIES];
166
167         for (int h = 0; h < NUM_THEORIES; ++h) {
168                 delays[h] = THEORY_FROM + h * (THEORY_TO - THEORY_FROM) / (NUM_THEORIES - 1);
169                 bases[h].id = h;
170                 bases[h].cost = 0.0;
171                 bases[h].prev = NULL;
172         }
173
174         hypothesis *prev_hyp = bases;
175         size_t total_end = pcm.size();
176         //size_t total_end = 4410000;
177         for (unsigned i = 0; i < total_end; i += BUFSIZE) {
178                 fprintf(stderr, "%.3f\n", i / 44100.0);
179                 size_t end = std::min<size_t>(i + BUFSIZE, total_end);
180         
181                 hypothesis *hyp = new hypothesis[NUM_THEORIES];
182
183                 // evaluate all hypotheses
184                 for (int h = 0; h < NUM_THEORIES; ++h) {
185                         double sum = 0.0;
186                         double d = delays[h];
187                         for (unsigned s = i; s < end; ++s) {
188                                 double left = norm[s].left;
189                                 double right = interpolate(norm, s + d);
190                                 double diff = (right - left) * (right - left);
191                                 sum += diff;
192                         }
193
194                         double best_cost = HUGE_VAL;
195                         hypothesis *best_prev = NULL;
196                         for (int hp = 0; hp < NUM_THEORIES; ++hp) {
197                                 double switch_cost = SWITCH_COST * fabs(delays[hp] - delays[h]);
198                                 double cost = prev_hyp[hp].cost + sum + switch_cost;
199                                 if (best_prev == NULL || cost < best_cost) {
200                                         best_cost = cost;
201                                         best_prev = &prev_hyp[hp];
202                                 }
203                         }
204
205                         hyp[h].id = h;
206                         hyp[h].cost = best_cost;
207                         hyp[h].prev = best_prev;
208                 }
209
210                 prev_hyp = hyp;
211         }
212
213         // best winner
214         double best_cost = HUGE_VAL;
215         hypothesis *best_hyp = NULL;
216         for (int h = 0; h < NUM_THEORIES; ++h) {
217                 if (best_hyp == NULL || prev_hyp[h].cost < best_cost) {
218                         best_cost = prev_hyp[h].cost;
219                         best_hyp = &prev_hyp[h];
220                 }
221         }
222
223         // trace the path backwards
224         std::vector<double> best_path;
225         while (best_hyp != NULL) {
226                 best_path.push_back(delays[best_hyp->id]);
227                 best_hyp = best_hyp->prev;
228         }
229
230         reverse(best_path.begin(), best_path.end());
231
232         fprintf(stderr, "Writing misalignment graphs to misalignment.plot...\n");
233         FILE *fp = fopen("misalignment.plot", "w");
234         for (unsigned i = 0; i < best_path.size(); ++i) {
235                 fprintf(fp, "%f %f\n", i * BUFSIZE / 44100.0, best_path[i]);
236         }
237         fclose(fp);
238
239         // readjust and write out
240         double inv_sd = sqrt(2.0) / sqrt(var_left + var_right);
241         std::vector<stereo_sample> aligned_pcm;
242         std::vector<short> mono_pcm;
243         for (unsigned i = 0; i < total_end; ++i) {
244                 double d = interpolate(best_path, i / double(BUFSIZE));
245                 int left = pcm[i].left;
246                 int right = interpolate(pcm, i + d);
247
248                 stereo_sample ss;
249                 ss.left = left;
250                 ss.right = clip(right);
251                 aligned_pcm.push_back(ss);
252
253                 mono_pcm.push_back(clip(lrintf(inv_sd * 4096.0 * (left + right))));
254         }
255
256         fprintf(stderr, "Writing realigned stereo channels to aligned.raw...\n");
257         fp = fopen("aligned.raw", "wb");
258         fwrite(aligned_pcm.data(), sizeof(stereo_sample) * aligned_pcm.size(), 1, fp);
259         fclose(fp);
260
261         fprintf(stderr, "Writing combined mono track to combined.raw...\n");
262         fp = fopen("combined.raw", "wb");
263         fwrite(mono_pcm.data(), sizeof(short) * mono_pcm.size(), 1, fp);
264         fclose(fp);
265
266         fprintf(stderr, "All done.\n");
267
268 #if 0
269
270         for (int sec = 0; sec < 2400; ++sec) {
271                 double from_offset = -128.0;
272                 double to_offset = 128.0;
273                 double best_offset = HUGE_VAL;
274                 for (int iter = 0; iter < 5; ++iter) {  
275                         //printf("from %f to %f\n", from_offset, to_offset);
276                         double best_sum = HUGE_VAL;
277                 
278                         for (int k = 0; k < 100; ++k) {
279                                 double offset = from_offset + k * 0.01 * (to_offset - from_offset);
280                                 double sum = 0.0;
281                                 for (unsigned i = sec * 4410; i < sec * 4410 + 4410; ++i) {
282                                 //for (unsigned i = 100000; i < 10NUM_THEORIES0; ++i) {
283                                         double left = norm[i].left;
284                                         double right = interpolate(norm, i + offset);
285                                         //double right = (norm[i + offset].right - mean_right) * inv_sd_right;
286                                         double diff = (right - left) * (right - left);
287                                         sum += diff;
288                                 }
289                 
290                                 if (sum < best_sum) {
291                                         best_sum = sum;
292                                         best_offset = offset;
293                                 }
294
295                                 //printf("%f %f\n", offset, sqrt(sum / NUM_THEORIES00.0f));
296                                 //fflush(stdout);
297                         }
298                 
299                         double range = 0.1 * (to_offset - from_offset);
300                         from_offset = best_offset - range;
301                         to_offset = best_offset + range;
302                 }
303                 printf("%d %f\n", sec, best_offset);
304                 fflush(stdout);
305         }
306 #endif
307         
308
309 }