From b375322092639df22d3a79554ab530bf93ece7d1 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 11 Oct 2015 01:05:07 +0200 Subject: [PATCH] Add a resampler module, as start of sound support. --- Makefile | 4 +- mixer.cpp | 43 +++++++++++++++ mixer.h | 2 + resampler.cpp | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++ resampler.h | 90 ++++++++++++++++++++++++++++++++ 5 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 resampler.cpp create mode 100644 resampler.h diff --git a/Makefile b/Makefile index 79d7bad..5d9e6ab 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,14 @@ CXX=g++ PKG_MODULES = Qt5Core Qt5Gui Qt5Widgets Qt5OpenGLExtensions Qt5OpenGL libusb-1.0 movit lua5.2 CXXFLAGS := -O2 -march=native -g -std=gnu++11 -Wall -Wno-deprecated-declarations -fPIC $(shell pkg-config --cflags $(PKG_MODULES)) -pthread -DMOVIT_SHADER_DIR=\"$(shell pkg-config --variable=shaderdir movit)\" -LDFLAGS=$(shell pkg-config --libs $(PKG_MODULES)) -lEGL -lGL -pthread -lva -lva-drm -lva-x11 -lX11 -lavformat -lavcodec -lavutil +LDFLAGS=$(shell pkg-config --libs $(PKG_MODULES)) -lEGL -lGL -pthread -lva -lva-drm -lva-x11 -lX11 -lavformat -lavcodec -lavutil -lzita-resampler # Qt objects OBJS=glwidget.o main.o mainwindow.o window.o OBJS += glwidget.moc.o mainwindow.moc.o window.moc.o # Mixer objects -OBJS += h264encode.o mixer.o bmusb.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o +OBJS += h264encode.o mixer.o bmusb.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampler.o %.o: %.cpp $(CXX) -MMD -MP $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $< diff --git a/mixer.cpp b/mixer.cpp index 73a476a..c126c0b 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -50,6 +50,24 @@ using namespace std::placeholders; Mixer *global_mixer = nullptr; +namespace { + +void convert_fixed24_to_fp32(float *dst, size_t out_channels, const uint8_t *src, size_t in_channels, size_t num_samples) +{ + for (size_t i = 0; i < num_samples; ++i) { + for (size_t j = 0; j < out_channels; ++j) { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24); + dst[i * out_channels + j] = int(s) * (1.0f / 4294967296.0f); + } + src += 3 * (in_channels - out_channels); + } +} + +} // namespace + Mixer::Mixer(const QSurfaceFormat &format) : mixer_surface(create_surface(format)), h264_encoder_surface(create_surface(format)) @@ -100,6 +118,7 @@ Mixer::Mixer(const QSurfaceFormat &format) [this]{ resource_pool->clean_context(); }); + card->resampler = new Resampler(48000.0, 48000.0, 2); card->usb->configure_card(); } @@ -190,6 +209,30 @@ void Mixer::bm_frame(int card_index, uint16_t timecode, card->new_data_ready_changed.notify_all(); } + // As a test of the resampler, send the data from card 0 through it and onto disk. + // TODO: Send the audio on, and encode it through ffmpeg. + if (card_index == 0) { + size_t num_samples = (audio_frame.len - audio_offset) / 8 / 3; + double pts = timecode / 60.0; // FIXME: Unwrap. And rebase. + unique_ptr samplesf(new float[num_samples * 2]); + convert_fixed24_to_fp32(samplesf.get(), 2, audio_frame.data + audio_offset, 8, num_samples); + card->resampler->add_input_samples(pts, samplesf.get(), num_samples); + + float samples_out[(48000 / 60) * 2]; + card->resampler->get_output_samples(pts, samples_out, 48000 / 60); + + static FILE *audiofp = nullptr; + if (audiofp == nullptr) { + audiofp = fopen("audio.raw", "wb"); + } + fwrite(samples_out, sizeof(samples_out), 1, audiofp); + //fwrite(samplesf.get(), num_samples * sizeof(float) * 2, 1, audiofp); + + if (audio_frame.len - audio_offset != 19200) { + printf("%d: %d samples (%d bytes)\n", card_index, int(num_samples), int(audio_frame.len - audio_offset)); + } + } + // Video frame will be released when last user of card->new_frame goes out of scope. card->usb->get_audio_frame_allocator()->release_frame(audio_frame); } diff --git a/mixer.h b/mixer.h index b2561d1..f4ea5c9 100644 --- a/mixer.h +++ b/mixer.h @@ -15,6 +15,7 @@ #include "ref_counted_frame.h" #include "ref_counted_gl_sync.h" #include "theme.h" +#include "resampler.h" #define NUM_CARDS 2 @@ -116,6 +117,7 @@ private: RefCountedFrame new_frame; GLsync new_data_ready_fence; // Whether new_frame is ready for rendering. std::condition_variable new_data_ready_changed; // Set whenever new_data_ready is changed. + Resampler *resampler = nullptr; }; CaptureCard cards[NUM_CARDS]; // protected by diff --git a/resampler.cpp b/resampler.cpp new file mode 100644 index 0000000..5731465 --- /dev/null +++ b/resampler.cpp @@ -0,0 +1,141 @@ +// Parts of the code is adapted from Adriaensen's project Zita-ajbridge, +// although it has been heavily reworked for this use case. Original copyright follows: +// +// Copyright (C) 2012-2015 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include "resampler.h" + +#include +#include +#include + +Resampler::Resampler(unsigned freq_in, unsigned freq_out, unsigned num_channels) + : freq_in(freq_in), freq_out(freq_out), num_channels(num_channels), + ratio(double(freq_out) / double(freq_in)) +{ + vresampler.setup(ratio, num_channels, /*hlen=*/32); + + // Prime the resampler so there's no more delay. + vresampler.inp_count = vresampler.inpsize() / 2 - 1; + vresampler.out_count = 1048576; + vresampler.process (); +} + +void Resampler::add_input_samples(double pts, const float *samples, ssize_t num_samples) +{ + if (first_input) { + // Synthesize a fake length. + last_input_len = double(num_samples) / freq_in; + first_input = false; + } else { + last_input_len = pts - last_input_pts; + } + + last_input_pts = pts; + + k_a0 = k_a1; + k_a1 += num_samples; + + for (ssize_t i = 0; i < num_samples * num_channels; ++i) { + buffer.push_back(samples[i]); + } +} + +void Resampler::get_output_samples(double pts, float *samples, ssize_t num_samples) +{ + double last_output_len; + if (first_output) { + // Synthesize a fake length. + last_output_len = double(num_samples) / freq_out; + } else { + last_output_len = pts - last_output_pts; + } + last_output_pts = pts; + + // Using the time point since just before the last call to add_input_samples() as a base, + // estimate actual delay based on activity since then, measured in number of input samples: + double actual_delay = 0.0; + actual_delay += (k_a1 - k_a0) * last_output_len / last_input_len; // Inserted samples since k_a0, rescaled for the different time periods. + actual_delay += k_a0 - total_consumed_samples; // Samples inserted before k_a0 but not consumed yet. + actual_delay += vresampler.inpdist(); // Delay in the resampler itself. + double err = actual_delay - expected_delay; + if (first_output && err < 0.0) { + // Before the very first block, insert artificial delay based on our initial estimate, + // so that we don't need a long period to stabilize at the beginning. + int delay_samples_to_add = lrintf(-err); + for (ssize_t i = 0; i < delay_samples_to_add * num_channels; ++i) { + buffer.push_front(0.0f); + } + total_consumed_samples -= delay_samples_to_add; // Equivalent to increasing k_a0 and k_a1. + err += delay_samples_to_add; + first_output = false; + } + + // Compute loop filter coefficients for the two filters. We need to compute them + // every time, since they depend on the number of samples the user asked for. + // + // The loop bandwidth starts at 1.0 Hz, then goes down to 0.05 Hz after four seconds. + double loop_bandwidth_hz = (k_a0 < 4 * freq_in) ? 1.0 : 0.05; + + // Set first filter much wider than the first one (20x as wide). + double w = (2.0 * M_PI) * 20.0 * loop_bandwidth_hz * num_samples / freq_out; + double w0 = 1.0 - exp(-w); + + // Set second filter. + w = (2.0 * M_PI) * loop_bandwidth_hz * ratio / freq_out; + double w1 = w * 1.6; + double w2 = w * num_samples / 1.6; + + // Filter through the loop filter to find the correction ratio. + z1 += w0 * (w1 * err - z1); + z2 += w0 * (z1 - z2); + z3 += w2 * z2; + double rcorr = 1.0 - z2 - z3; + if (rcorr > 1.05) rcorr = 1.05; + if (rcorr < 0.95) rcorr = 0.95; + vresampler.set_rratio(rcorr); + + // Finally actually resample, consuming exactly output samples. + vresampler.out_data = samples; + vresampler.out_count = num_samples; + while (vresampler.out_count > 0) { + if (buffer.empty()) { + // This should never happen unless delay is set way too low, + // or we're dropping a lot of data. + fprintf(stderr, "PANIC: Out of input samples to resample, still need %d output samples!\n", + int(vresampler.out_count)); + break; + } + + float inbuf[1024]; + size_t num_input_samples = sizeof(inbuf) / (sizeof(float) * num_channels); + if (num_input_samples * num_channels > buffer.size()) { + num_input_samples = buffer.size() / num_channels; + } + for (size_t i = 0; i < num_input_samples * num_channels; ++i) { + inbuf[i] = buffer[i]; + } + + vresampler.inp_count = num_input_samples; + vresampler.inp_data = inbuf; + + vresampler.process(); + + size_t consumed_samples = num_input_samples - vresampler.inp_count; + total_consumed_samples += consumed_samples; + buffer.erase(buffer.begin(), buffer.begin() + consumed_samples * num_channels); + } +} diff --git a/resampler.h b/resampler.h new file mode 100644 index 0000000..80ed1af --- /dev/null +++ b/resampler.h @@ -0,0 +1,90 @@ +#ifndef _RESAMPLER_H +#define _RESAMPLER_H 1 + +// Takes in samples from an input source, possibly with jitter, and outputs a fixed number +// of samples every iteration. Used to a) change sample rates if needed, and b) deal with +// input sources that don't have audio locked to video. For every input video +// frame, you call add_input_samples() with the pts (measured in seconds) of the video frame, +// taken to be the start point of the frame's audio. When you want to _output_ a finished +// frame with audio, you get_output_samples() with the number of samples you want, and will +// get exactly that number of samples back. If the input and output clocks are not in sync, +// the audio will be stretched for you. (If they are _very_ out of sync, this will come through +// as a pitch shift.) Of course, the process introduces some delay; you specify a target delay +// (typically measured in milliseconds, although more is fine) and the algorithm works to +// provide exactly that. +// +// A/V sync is a much harder problem than one would intuitively assume. This implementation +// is based on a 2012 paper by Fons Adriaensen, “Controlling adaptive resampling” +// (http://kokkinizita.linuxaudio.org/papers/adapt-resamp.pdf). The paper gives an algorithm +// that converges to jitter of <100 ns; the basic idea is to measure the _rate_ the input +// queue fills and is drained (as opposed to the length of the queue itself), and smoothly +// adjust the resampling rate so that it reaches steady state at the desired delay. +// +// Parts of the code is adapted from Adriaensen's project Zita-ajbridge (based on the same +// algorithm), although it has been heavily reworked for this use case. Original copyright follows: +// +// Copyright (C) 2012-2015 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include +#include +#include + +#include + +class Resampler { +public: + Resampler(unsigned freq_in, unsigned freq_out, unsigned num_channels = 2); + + // Note: pts is always in seconds. + void add_input_samples(double pts, const float *samples, ssize_t num_samples); + void get_output_samples(double pts, float *samples, ssize_t num_samples); + +private: + void init_loop_filter(double bandwidth_hz); + + VResampler vresampler; + + unsigned freq_in, freq_out, num_channels; + + bool first_input = true, first_output = true; + double last_input_pts; // Start of last input block, in seconds. + double last_output_pts; + + ssize_t k_a0 = 0; // Total amount of samples inserted _before_ the last call to add_input_samples(). + ssize_t k_a1 = 0; // Total amount of samples inserted _after_ the last call to add_input_samples(). + + ssize_t total_consumed_samples = 0; + + // Duration of last input block, in seconds. + double last_input_len; + + // Filter state for the loop filter. + double z1 = 0.0, z2 = 0.0, z3 = 0.0; + + // Ratio between the two frequencies. + double ratio; + + // How much delay we are expected to have, in input samples. + // If actual delay drifts too much away from this, we will start + // changing the resampling ratio to compensate. + double expected_delay = 4800.0; + + // Input samples not yet fed into the resampler. + // TODO: Use a circular buffer instead, for efficiency. + std::deque buffer; +}; + +#endif // !defined(_RESAMPLER_H) -- 2.39.2