Specify levels in terms of 0..1 and not 0..32768.
[c64tapwav] / sync.cpp
1 // A program that tries to sync up the two channels, using Viterbi decoding
2 // to find the most likely misalignment as it changes throughout the file.
3
4 #define NUM_THEORIES 200
5 #define THEORY_FROM -20.0  /* in samples */
6 #define THEORY_TO 20.0  /* in samples */
7 #define SWITCH_COST 1000.0  /* pretty arbitrary */
8
9 #include <stdio.h>
10 #include <math.h>
11 #include <vector>
12 #include <algorithm>
13
14 #include "interpolate.h"
15
16 #define BUFSIZE 4096
17
18 struct stereo_sample {
19         short left, right;
20 };
21 struct float_stereo_sample {
22         float left, right;
23 };
24
25 inline short clip(int x)
26 {
27         if (x < -32768) {
28                 return x;
29         } else if (x > 32767) {
30                 return 32767;
31         } else {
32                 return short(x);
33         }
34 }
35
36 struct hypothesis {
37         int id;
38         double cost;
39         hypothesis *prev;
40 };
41
42 int main(int argc, char **argv)
43 {
44         make_lanczos_weight_table();
45         std::vector<stereo_sample> pcm;
46
47         while (!feof(stdin)) {
48                 stereo_sample buf[BUFSIZE];
49                 ssize_t ret = fread(buf, sizeof(stereo_sample), BUFSIZE, stdin);
50                 if (ret >= 0) {
51                         pcm.insert(pcm.end(), buf, buf + ret);
52                 }
53         }
54
55         double sum_left = 0.0, sum_right = 0.0;
56         for (unsigned i = 0; i < pcm.size(); ++i) {
57                 sum_left += pcm[i].left;
58                 sum_right += pcm[i].right;
59         }
60         double mean_left = sum_left / pcm.size();
61         double mean_right = sum_right / pcm.size();
62
63         //fprintf(stderr, "Mean: L=%f R=%f\n", mean_left, mean_right);
64
65         double sum2_left = 0.0, sum2_right = 0.0;
66         for (unsigned i = 0; i < pcm.size(); ++i) {
67                 sum2_left += (pcm[i].left - mean_left) * (pcm[i].left - mean_left);
68                 sum2_right += (pcm[i].right - mean_right) * (pcm[i].right - mean_right);
69         }
70         double var_left = sum2_left / (pcm.size() - 1);
71         double var_right = sum2_right / (pcm.size() - 1);
72
73         //fprintf(stderr, "Stddev: L=%f R=%f\n", sqrt(var_left), sqrt(var_right));
74
75         double inv_sd_left = 1.0 / sqrt(var_left);
76         double inv_sd_right = 1.0 / sqrt(var_right);
77
78         std::vector<float_stereo_sample> norm;
79         norm.resize(pcm.size());
80
81         for (unsigned i = 0; i < pcm.size(); ++i) {
82                 norm[i].left = (pcm[i].left - mean_left) * inv_sd_left;
83                 norm[i].right = (pcm[i].right - mean_right) * inv_sd_right;
84         }
85
86 #if 0
87         double offset = 0.0;
88         double old_diff = 0.0;
89         for (unsigned i = 0; i < pcm.size(); ++i) {
90                 double left = (pcm[i].left - mean_left) * inv_sd_left;
91                 double right = (interpolate(pcm, i + offset) - mean_right) * inv_sd_right;
92
93                 double diff = right - left;
94                 old_diff = old_diff * 0.9999 + diff * 0.0001;
95                 offset -= 0.1 * old_diff;
96
97                 if (i % 100 == 0) {
98                         fprintf(stderr, "%7.3f: %7.3f [diff=%8.3f lagged diff=%8.3f]\n", i / 44100.0, offset, diff, old_diff);
99                 }
100                 printf("%f %f %f\n", i / 44100.0, left, right);
101         }
102 #endif
103
104         double delays[NUM_THEORIES];
105         std::vector<hypothesis *> alloc_hypot;
106         hypothesis *bases = new hypothesis[NUM_THEORIES];
107         alloc_hypot.push_back(bases);
108
109         for (int h = 0; h < NUM_THEORIES; ++h) {
110                 delays[h] = THEORY_FROM + h * (THEORY_TO - THEORY_FROM) / (NUM_THEORIES - 1);
111                 bases[h].id = h;
112                 bases[h].cost = 0.0;
113                 bases[h].prev = NULL;
114         }
115
116         fprintf(stderr, "Matching blocks... %7.2f", 0.0);
117         hypothesis *prev_hyp = bases;
118         size_t total_end = pcm.size();
119         //size_t total_end = 441000;
120         for (unsigned i = 0; i < total_end; i += BUFSIZE) {
121                 fprintf(stderr, "\b\b\b\b\b\b\b%7.2f", i / 44100.0);
122                 size_t end = std::min<size_t>(i + BUFSIZE, total_end);
123         
124                 hypothesis *hyp = new hypothesis[NUM_THEORIES];
125                 alloc_hypot.push_back(hyp);
126
127                 // evaluate all hypotheses
128                 for (int h = 0; h < NUM_THEORIES; ++h) {
129                         double sum = 0.0;
130                         double d = delays[h];
131                         for (unsigned s = i; s < end; ++s) {
132                                 double left = norm[s].left;
133                                 double right = linear_interpolate_right(norm, s + d);
134                                 double diff = (right - left) * (right - left);
135                                 sum += diff;
136                         }
137
138                         double best_cost = HUGE_VAL;
139                         hypothesis *best_prev = NULL;
140                         for (int hp = 0; hp < NUM_THEORIES; ++hp) {
141                                 double switch_cost = SWITCH_COST * fabs(delays[hp] - delays[h]);
142                                 double cost = prev_hyp[hp].cost + sum + switch_cost;
143                                 if (best_prev == NULL || cost < best_cost) {
144                                         best_cost = cost;
145                                         best_prev = &prev_hyp[hp];
146                                 }
147                         }
148
149                         hyp[h].id = h;
150                         hyp[h].cost = best_cost;
151                         hyp[h].prev = best_prev;
152                 }
153
154                 prev_hyp = hyp;
155         }
156         fprintf(stderr, "\b\b\b\b\b\b\b%7.2f\n", total_end / 44100.0);
157
158         // best winner
159         double best_cost = HUGE_VAL;
160         hypothesis *best_hyp = NULL;
161         for (int h = 0; h < NUM_THEORIES; ++h) {
162                 if (best_hyp == NULL || prev_hyp[h].cost < best_cost) {
163                         best_cost = prev_hyp[h].cost;
164                         best_hyp = &prev_hyp[h];
165                 }
166         }
167
168         // trace the path backwards
169         std::vector<double> best_path;
170         while (best_hyp != NULL) {
171                 best_path.push_back(delays[best_hyp->id]);
172                 best_hyp = best_hyp->prev;
173         }
174
175         reverse(best_path.begin(), best_path.end());
176
177         fprintf(stderr, "Writing misalignment graphs to misalignment.plot...\n");
178         FILE *fp = fopen("misalignment.plot", "w");
179         for (unsigned i = 0; i < best_path.size(); ++i) {
180                 fprintf(fp, "%f %f\n", i * BUFSIZE / 44100.0, best_path[i]);
181         }
182         fclose(fp);
183         
184         // save some RAM
185         norm = std::vector<float_stereo_sample>();
186         for (unsigned i = 0; i < alloc_hypot.size(); ++i) {
187                 delete[] alloc_hypot[i];
188         }
189
190         fprintf(stderr, "Stretching right channel to match left... %7.2f%%", 0.0);
191         double inv_sd = sqrt(2.0) / sqrt(var_left + var_right);
192         std::vector<stereo_sample> aligned_pcm;
193         std::vector<short> mono_pcm;
194         aligned_pcm.resize(total_end);
195         mono_pcm.resize(total_end);
196         for (unsigned i = 0; i < total_end; ++i) {
197                 double d = lanczos_interpolate(best_path, i / double(BUFSIZE));
198                 int left = pcm[i].left;
199                 int right = lanczos_interpolate_right(pcm, i + d);
200
201                 aligned_pcm[i].left = left;
202                 aligned_pcm[i].right = clip(right);
203
204                 mono_pcm[i] = clip(lrintf(inv_sd * 4096.0 * (left + right)));
205
206                 if (i % 4096 == 0) {
207                         fprintf(stderr, "\b\b\b\b\b\b\b\b%7.2f%%", 100.0 * i / total_end);
208                 }
209         }
210         fprintf(stderr, "\b\b\b\b\b\b\b%7.2f%%\n", 100.0);
211
212         fprintf(stderr, "Writing realigned stereo channels to aligned.raw...\n");
213         fp = fopen("aligned.raw", "wb");
214         fwrite(aligned_pcm.data(), sizeof(stereo_sample) * aligned_pcm.size(), 1, fp);
215         fclose(fp);
216
217         fprintf(stderr, "Writing combined mono track to combined.raw...\n");
218         fp = fopen("combined.raw", "wb");
219         fwrite(mono_pcm.data(), sizeof(short) * mono_pcm.size(), 1, fp);
220         fclose(fp);
221
222         fprintf(stderr, "All done.\n");
223
224 #if 0
225
226         for (int sec = 0; sec < 2400; ++sec) {
227                 double from_offset = -128.0;
228                 double to_offset = 128.0;
229                 double best_offset = HUGE_VAL;
230                 for (int iter = 0; iter < 5; ++iter) {  
231                         //printf("from %f to %f\n", from_offset, to_offset);
232                         double best_sum = HUGE_VAL;
233                 
234                         for (int k = 0; k < 100; ++k) {
235                                 double offset = from_offset + k * 0.01 * (to_offset - from_offset);
236                                 double sum = 0.0;
237                                 for (unsigned i = sec * 4410; i < sec * 4410 + 4410; ++i) {
238                                 //for (unsigned i = 100000; i < 10NUM_THEORIES0; ++i) {
239                                         double left = norm[i].left;
240                                         double right = interpolate(norm, i + offset);
241                                         //double right = (norm[i + offset].right - mean_right) * inv_sd_right;
242                                         double diff = (right - left) * (right - left);
243                                         sum += diff;
244                                 }
245                 
246                                 if (sum < best_sum) {
247                                         best_sum = sum;
248                                         best_offset = offset;
249                                 }
250
251                                 //printf("%f %f\n", offset, sqrt(sum / NUM_THEORIES00.0f));
252                                 //fflush(stdout);
253                         }
254                 
255                         double range = 0.1 * (to_offset - from_offset);
256                         from_offset = best_offset - range;
257                         to_offset = best_offset + range;
258                 }
259                 printf("%d %f\n", sec, best_offset);
260                 fflush(stdout);
261         }
262 #endif
263         
264
265 }