1 // A program that tries to sync up the two channels, using Viterbi decoding
2 // to find the most likely misalignment as it changes throughout the file.
4 #define NUM_THEORIES 200
5 #define THEORY_FROM -10.0 /* in samples */
6 #define THEORY_TO 10.0 /* in samples */
7 #define SWITCH_COST 1000.0 /* pretty arbitrary */
14 #define LANCZOS_RADIUS 30
17 struct stereo_sample {
20 struct double_stereo_sample {
24 inline short clip(int x)
28 } else if (x > 32767) {
38 return 1.0f - fabs(x);
45 double weight(double x)
47 if (fabs(x) > LANCZOS_RADIUS) {
50 return sinc(M_PI * x) * sinc(M_PI * x / LANCZOS_RADIUS);
53 double interpolate(const std::vector<double_stereo_sample> &pcm, double i)
55 int lower = std::max<int>(ceil(i - LANCZOS_RADIUS), 0);
56 int upper = std::min<int>(floor(i + LANCZOS_RADIUS), pcm.size() - 1);
59 for (int x = lower; x <= upper; ++x) {
60 sum += pcm[x].right * weight(i - x);
65 double weight(double x)
70 return 1.0f - fabs(x);
73 inline double interpolate(const std::vector<double> &pcm, double i)
76 if (ii < 0 || ii >= int(pcm.size() - 1)) {
81 return pcm[ii] + frac * (pcm[ii + 1] - pcm[ii]);
85 inline double interpolate(const std::vector<T> &pcm, double i)
88 if (ii < 0 || ii >= int(pcm.size() - 1)) {
93 return pcm[ii].right + frac * (pcm[ii + 1].right - pcm[ii].right);
103 int main(int argc, char **argv)
105 std::vector<stereo_sample> pcm;
107 while (!feof(stdin)) {
108 stereo_sample buf[BUFSIZE];
109 ssize_t ret = fread(buf, sizeof(stereo_sample), BUFSIZE, stdin);
111 pcm.insert(pcm.end(), buf, buf + ret);
115 double sum_left = 0.0, sum_right = 0.0;
116 for (unsigned i = 0; i < pcm.size(); ++i) {
117 sum_left += pcm[i].left;
118 sum_right += pcm[i].right;
120 double mean_left = sum_left / pcm.size();
121 double mean_right = sum_right / pcm.size();
123 //fprintf(stderr, "Mean: L=%f R=%f\n", mean_left, mean_right);
125 double sum2_left = 0.0, sum2_right = 0.0;
126 for (unsigned i = 0; i < pcm.size(); ++i) {
127 sum2_left += (pcm[i].left - mean_left) * (pcm[i].left - mean_left);
128 sum2_right += (pcm[i].right - mean_right) * (pcm[i].right - mean_right);
130 double var_left = sum2_left / (pcm.size() - 1);
131 double var_right = sum2_right / (pcm.size() - 1);
133 //fprintf(stderr, "Stddev: L=%f R=%f\n", sqrt(var_left), sqrt(var_right));
135 double inv_sd_left = 1.0 / sqrt(var_left);
136 double inv_sd_right = 1.0 / sqrt(var_right);
138 std::vector<double_stereo_sample> norm;
139 norm.resize(pcm.size());
141 for (unsigned i = 0; i < pcm.size(); ++i) {
142 norm[i].left = (pcm[i].left - mean_left) * inv_sd_left;
143 norm[i].right = (pcm[i].right - mean_right) * inv_sd_right;
148 double old_diff = 0.0;
149 for (unsigned i = 0; i < pcm.size(); ++i) {
150 double left = (pcm[i].left - mean_left) * inv_sd_left;
151 double right = (interpolate(pcm, i + offset) - mean_right) * inv_sd_right;
153 double diff = right - left;
154 old_diff = old_diff * 0.9999 + diff * 0.0001;
155 offset -= 0.1 * old_diff;
158 fprintf(stderr, "%7.3f: %7.3f [diff=%8.3f lagged diff=%8.3f]\n", i / 44100.0, offset, diff, old_diff);
160 printf("%f %f %f\n", i / 44100.0, left, right);
164 double delays[NUM_THEORIES];
165 hypothesis *bases = new hypothesis[NUM_THEORIES];
167 for (int h = 0; h < NUM_THEORIES; ++h) {
168 delays[h] = THEORY_FROM + h * (THEORY_TO - THEORY_FROM) / (NUM_THEORIES - 1);
171 bases[h].prev = NULL;
174 hypothesis *prev_hyp = bases;
175 size_t total_end = pcm.size();
176 //size_t total_end = 4410000;
177 for (unsigned i = 0; i < total_end; i += BUFSIZE) {
178 fprintf(stderr, "%.3f\n", i / 44100.0);
179 size_t end = std::min<size_t>(i + BUFSIZE, total_end);
181 hypothesis *hyp = new hypothesis[NUM_THEORIES];
183 // evaluate all hypotheses
184 for (int h = 0; h < NUM_THEORIES; ++h) {
186 double d = delays[h];
187 for (unsigned s = i; s < end; ++s) {
188 double left = norm[s].left;
189 double right = interpolate(norm, s + d);
190 double diff = (right - left) * (right - left);
194 double best_cost = HUGE_VAL;
195 hypothesis *best_prev = NULL;
196 for (int hp = 0; hp < NUM_THEORIES; ++hp) {
197 double switch_cost = SWITCH_COST * fabs(delays[hp] - delays[h]);
198 double cost = prev_hyp[hp].cost + sum + switch_cost;
199 if (best_prev == NULL || cost < best_cost) {
201 best_prev = &prev_hyp[hp];
206 hyp[h].cost = best_cost;
207 hyp[h].prev = best_prev;
214 double best_cost = HUGE_VAL;
215 hypothesis *best_hyp = NULL;
216 for (int h = 0; h < NUM_THEORIES; ++h) {
217 if (best_hyp == NULL || prev_hyp[h].cost < best_cost) {
218 best_cost = prev_hyp[h].cost;
219 best_hyp = &prev_hyp[h];
223 // trace the path backwards
224 std::vector<double> best_path;
225 while (best_hyp != NULL) {
226 best_path.push_back(delays[best_hyp->id]);
227 best_hyp = best_hyp->prev;
230 reverse(best_path.begin(), best_path.end());
232 fprintf(stderr, "Writing misalignment graphs to misalignment.plot...\n");
233 FILE *fp = fopen("misalignment.plot", "w");
234 for (unsigned i = 0; i < best_path.size(); ++i) {
235 fprintf(fp, "%f %f\n", i * BUFSIZE / 44100.0, best_path[i]);
239 // readjust and write out
240 double inv_sd = sqrt(2.0) / sqrt(var_left + var_right);
241 std::vector<stereo_sample> aligned_pcm;
242 std::vector<short> mono_pcm;
243 for (unsigned i = 0; i < total_end; ++i) {
244 double d = interpolate(best_path, i / double(BUFSIZE));
245 int left = pcm[i].left;
246 int right = interpolate(pcm, i + d);
250 ss.right = clip(right);
251 aligned_pcm.push_back(ss);
253 mono_pcm.push_back(clip(lrintf(inv_sd * 4096.0 * (left + right))));
256 fprintf(stderr, "Writing realigned stereo channels to aligned.raw...\n");
257 fp = fopen("aligned.raw", "wb");
258 fwrite(aligned_pcm.data(), sizeof(stereo_sample) * aligned_pcm.size(), 1, fp);
261 fprintf(stderr, "Writing combined mono track to combined.raw...\n");
262 fp = fopen("combined.raw", "wb");
263 fwrite(mono_pcm.data(), sizeof(short) * mono_pcm.size(), 1, fp);
266 fprintf(stderr, "All done.\n");
270 for (int sec = 0; sec < 2400; ++sec) {
271 double from_offset = -128.0;
272 double to_offset = 128.0;
273 double best_offset = HUGE_VAL;
274 for (int iter = 0; iter < 5; ++iter) {
275 //printf("from %f to %f\n", from_offset, to_offset);
276 double best_sum = HUGE_VAL;
278 for (int k = 0; k < 100; ++k) {
279 double offset = from_offset + k * 0.01 * (to_offset - from_offset);
281 for (unsigned i = sec * 4410; i < sec * 4410 + 4410; ++i) {
282 //for (unsigned i = 100000; i < 10NUM_THEORIES0; ++i) {
283 double left = norm[i].left;
284 double right = interpolate(norm, i + offset);
285 //double right = (norm[i + offset].right - mean_right) * inv_sd_right;
286 double diff = (right - left) * (right - left);
290 if (sum < best_sum) {
292 best_offset = offset;
295 //printf("%f %f\n", offset, sqrt(sum / NUM_THEORIES00.0f));
299 double range = 0.1 * (to_offset - from_offset);
300 from_offset = best_offset - range;
301 to_offset = best_offset + range;
303 printf("%d %f\n", sec, best_offset);