OBJS += glwidget.moc.o mainwindow.moc.o vumeter.moc.o lrameter.moc.o correlation_meter.moc.o aboutdialog.moc.o ellipsis_label.moc.o
# Mixer objects
-OBJS += mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampling_queue.o httpd.o ebu_r128_proc.o flags.o image_input.o stereocompressor.o filter.o alsa_output.o correlation_measurer.o disk_space_estimator.o
+OBJS += mixer.o audio_mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampling_queue.o httpd.o ebu_r128_proc.o flags.o image_input.o stereocompressor.o filter.o alsa_output.o correlation_measurer.o disk_space_estimator.o
# Streaming and encoding objects
OBJS += quicksync_encoder.o x264_encoder.o x264_speed_control.o video_encoder.o metacube2.o mux.o audio_encoder.o ffmpeg_raii.o
--- /dev/null
+#include "audio_mixer.h"
+
+#include <assert.h>
+#include <endian.h>
+#include <bmusb/bmusb.h>
+#include <stdio.h>
+#include <cmath>
+
+#include "flags.h"
+#include "timebase.h"
+
+using namespace bmusb;
+using namespace std;
+
+namespace {
+
+void convert_fixed24_to_fp32(float *dst, size_t out_channels, const uint8_t *src, size_t in_channels, size_t num_samples)
+{
+ assert(in_channels >= out_channels);
+ for (size_t i = 0; i < num_samples; ++i) {
+ for (size_t j = 0; j < out_channels; ++j) {
+ uint32_t s1 = *src++;
+ uint32_t s2 = *src++;
+ uint32_t s3 = *src++;
+ uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24);
+ dst[i * out_channels + j] = int(s) * (1.0f / 2147483648.0f);
+ }
+ src += 3 * (in_channels - out_channels);
+ }
+}
+
+void convert_fixed32_to_fp32(float *dst, size_t out_channels, const uint8_t *src, size_t in_channels, size_t num_samples)
+{
+ assert(in_channels >= out_channels);
+ for (size_t i = 0; i < num_samples; ++i) {
+ for (size_t j = 0; j < out_channels; ++j) {
+ int32_t s = le32toh(*(int32_t *)src);
+ dst[i * out_channels + j] = s * (1.0f / 2147483648.0f);
+ src += 4;
+ }
+ src += 4 * (in_channels - out_channels);
+ }
+}
+
+} // namespace
+
+AudioMixer::AudioMixer(unsigned num_cards)
+ : num_cards(num_cards),
+ level_compressor(OUTPUT_FREQUENCY),
+ limiter(OUTPUT_FREQUENCY),
+ compressor(OUTPUT_FREQUENCY)
+{
+ locut.init(FILTER_HPF, 2);
+
+ set_locut_enabled(global_flags.locut_enabled);
+ set_gain_staging_db(global_flags.initial_gain_staging_db);
+ set_gain_staging_auto(global_flags.gain_staging_auto);
+ set_compressor_enabled(global_flags.compressor_enabled);
+ set_limiter_enabled(global_flags.limiter_enabled);
+ set_final_makeup_gain_auto(global_flags.final_makeup_gain_auto);
+}
+
+void AudioMixer::reset_card(unsigned card_index)
+{
+ CaptureCard *card = &cards[card_index];
+
+ unique_lock<mutex> lock(card->audio_mutex);
+ card->resampling_queue.reset(new ResamplingQueue(card_index, OUTPUT_FREQUENCY, OUTPUT_FREQUENCY, 2));
+ card->next_local_pts = 0;
+}
+
+void AudioMixer::add_audio(unsigned card_index, const uint8_t *data, unsigned num_samples, AudioFormat audio_format, int64_t frame_length)
+{
+ CaptureCard *card = &cards[card_index];
+
+ // Convert the audio to stereo fp32.
+ vector<float> audio;
+ audio.resize(num_samples * 2);
+ switch (audio_format.bits_per_sample) {
+ case 0:
+ assert(num_samples == 0);
+ break;
+ case 24:
+ convert_fixed24_to_fp32(&audio[0], 2, data, audio_format.num_channels, num_samples);
+ break;
+ case 32:
+ convert_fixed32_to_fp32(&audio[0], 2, data, audio_format.num_channels, num_samples);
+ break;
+ default:
+ fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
+ assert(false);
+ }
+
+ // Now add it.
+ {
+ unique_lock<mutex> lock(card->audio_mutex);
+
+ int64_t local_pts = card->next_local_pts;
+ card->resampling_queue->add_input_samples(local_pts / double(TIMEBASE), audio.data(), num_samples);
+ card->next_local_pts = local_pts + frame_length;
+ }
+}
+
+void AudioMixer::add_silence(unsigned card_index, unsigned samples_per_frame, unsigned num_frames, int64_t frame_length)
+{
+ CaptureCard *card = &cards[card_index];
+ unique_lock<mutex> lock(card->audio_mutex);
+
+ vector<float> silence(samples_per_frame * 2, 0.0f);
+ for (unsigned i = 0; i < num_frames; ++i) {
+ card->resampling_queue->add_input_samples(card->next_local_pts / double(TIMEBASE), silence.data(), samples_per_frame);
+ // Note that if the format changed in the meantime, we have
+ // no way of detecting that; we just have to assume the frame length
+ // is always the same.
+ card->next_local_pts += frame_length;
+ }
+}
+
+vector<float> AudioMixer::get_output(double pts, unsigned num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy)
+{
+ vector<float> samples_card;
+ vector<float> samples_out;
+ samples_out.resize(num_samples * 2);
+
+ // TODO: Allow more flexible input mapping.
+ for (unsigned card_index = 0; card_index < num_cards; ++card_index) {
+ samples_card.resize(num_samples * 2);
+ {
+ unique_lock<mutex> lock(cards[card_index].audio_mutex);
+ cards[card_index].resampling_queue->get_output_samples(
+ pts,
+ &samples_card[0],
+ num_samples,
+ rate_adjustment_policy);
+ }
+ if (card_index == 0) {
+ for (unsigned i = 0; i < num_samples * 2; ++i) {
+ samples_out[i] = samples_card[i];
+ }
+ } else {
+ for (unsigned i = 0; i < num_samples * 2; ++i) {
+ samples_out[i] += samples_card[i];
+ }
+ }
+ }
+
+ // Cut away everything under 120 Hz (or whatever the cutoff is);
+ // we don't need it for voice, and it will reduce headroom
+ // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
+ // should be dampened.)
+ if (locut_enabled) {
+ locut.render(samples_out.data(), samples_out.size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
+ }
+
+ {
+ unique_lock<mutex> lock(compressor_mutex);
+
+ // Apply a level compressor to get the general level right.
+ // Basically, if it's over about -40 dBFS, we squeeze it down to that level
+ // (or more precisely, near it, since we don't use infinite ratio),
+ // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
+ // entirely arbitrary, but from practical tests with speech, it seems to
+ // put ut around -23 LUFS, so it's a reasonable starting point for later use.
+ {
+ if (level_compressor_enabled) {
+ float threshold = 0.01f; // -40 dBFS.
+ float ratio = 20.0f;
+ float attack_time = 0.5f;
+ float release_time = 20.0f;
+ float makeup_gain = pow(10.0f, (ref_level_dbfs - (-40.0f)) / 20.0f); // +26 dB.
+ level_compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
+ gain_staging_db = 20.0 * log10(level_compressor.get_attenuation() * makeup_gain);
+ } else {
+ // Just apply the gain we already had.
+ float g = pow(10.0f, gain_staging_db / 20.0f);
+ for (size_t i = 0; i < samples_out.size(); ++i) {
+ samples_out[i] *= g;
+ }
+ }
+ }
+
+ #if 0
+ printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
+ level_compressor.get_level(), 20.0 * log10(level_compressor.get_level()),
+ level_compressor.get_attenuation(), 20.0 * log10(level_compressor.get_attenuation()),
+ 20.0 * log10(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
+ #endif
+
+ // float limiter_att, compressor_att;
+
+ // The real compressor.
+ if (compressor_enabled) {
+ float threshold = pow(10.0f, compressor_threshold_dbfs / 20.0f);
+ float ratio = 20.0f;
+ float attack_time = 0.005f;
+ float release_time = 0.040f;
+ float makeup_gain = 2.0f; // +6 dB.
+ compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
+ // compressor_att = compressor.get_attenuation();
+ }
+
+ // Finally a limiter at -4 dB (so, -10 dBFS) to take out the worst peaks only.
+ // Note that since ratio is not infinite, we could go slightly higher than this.
+ if (limiter_enabled) {
+ float threshold = pow(10.0f, limiter_threshold_dbfs / 20.0f);
+ float ratio = 30.0f;
+ float attack_time = 0.0f; // Instant.
+ float release_time = 0.020f;
+ float makeup_gain = 1.0f; // 0 dB.
+ limiter.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
+ // limiter_att = limiter.get_attenuation();
+ }
+
+ // printf("limiter=%+5.1f compressor=%+5.1f\n", 20.0*log10(limiter_att), 20.0*log10(compressor_att));
+ }
+
+ // At this point, we are most likely close to +0 LU, but all of our
+ // measurements have been on raw sample values, not R128 values.
+ // So we have a final makeup gain to get us to +0 LU; the gain
+ // adjustments required should be relatively small, and also, the
+ // offset shouldn't change much (only if the type of audio changes
+ // significantly). Thus, we shoot for updating this value basically
+ // “whenever we process buffers”, since the R128 calculation isn't exactly
+ // something we get out per-sample.
+ //
+ // Note that there's a feedback loop here, so we choose a very slow filter
+ // (half-time of 100 seconds).
+ double target_loudness_factor, alpha;
+ double loudness_lu = loudness_lufs - ref_level_lufs;
+ double current_makeup_lu = 20.0f * log10(final_makeup_gain);
+ target_loudness_factor = pow(10.0f, -loudness_lu / 20.0f);
+
+ // If we're outside +/- 5 LU uncorrected, we don't count it as
+ // a normal signal (probably silence) and don't change the
+ // correction factor; just apply what we already have.
+ if (fabs(loudness_lu - current_makeup_lu) >= 5.0 || !final_makeup_gain_auto) {
+ alpha = 0.0;
+ } else {
+ // Formula adapted from
+ // https://en.wikipedia.org/wiki/Low-pass_filter#Simple_infinite_impulse_response_filter.
+ const double half_time_s = 100.0;
+ const double fc_mul_2pi_delta_t = 1.0 / (half_time_s * OUTPUT_FREQUENCY);
+ alpha = fc_mul_2pi_delta_t / (fc_mul_2pi_delta_t + 1.0);
+ }
+
+ {
+ unique_lock<mutex> lock(compressor_mutex);
+ double m = final_makeup_gain;
+ for (size_t i = 0; i < samples_out.size(); i += 2) {
+ samples_out[i + 0] *= m;
+ samples_out[i + 1] *= m;
+ m += (target_loudness_factor - m) * alpha;
+ }
+ final_makeup_gain = m;
+ }
+
+ return samples_out;
+}
--- /dev/null
+#ifndef _AUDIO_MIXER_H
+#define _AUDIO_MIXER_H 1
+
+// The audio mixer, dealing with extracting the right signals from
+// each capture card, resampling signals so that they are in sync,
+// processing them with effects (if desired), and then mixing them
+// all together into one final audio signal.
+//
+// All operations on AudioMixer (except destruction) are thread-safe.
+//
+// TODO: There might be more audio stuff that should be moved here
+// from Mixer.
+
+#include <math.h>
+#include <stdint.h>
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include "bmusb/bmusb.h"
+#include "defs.h"
+#include "filter.h"
+#include "resampling_queue.h"
+#include "stereocompressor.h"
+
+namespace bmusb {
+struct AudioFormat;
+} // namespace bmusb
+
+class AudioMixer {
+public:
+ AudioMixer(unsigned num_cards);
+ void reset_card(unsigned card_index);
+
+ // frame_length is in TIMEBASE units.
+ void add_audio(unsigned card_index, const uint8_t *data, unsigned num_samples, bmusb::AudioFormat audio_format, int64_t frame_length);
+ void add_silence(unsigned card_index, unsigned samples_per_frame, unsigned num_frames, int64_t frame_length);
+ std::vector<float> get_output(double pts, unsigned num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy);
+
+ // See comments inside get_output().
+ void set_current_loudness(double level_lufs) { loudness_lufs = level_lufs; }
+
+ void set_locut_cutoff(float cutoff_hz)
+ {
+ locut_cutoff_hz = cutoff_hz;
+ }
+
+ void set_locut_enabled(bool enabled)
+ {
+ locut_enabled = enabled;
+ }
+
+ bool get_locut_enabled() const
+ {
+ return locut_enabled;
+ }
+
+ float get_limiter_threshold_dbfs() const
+ {
+ return limiter_threshold_dbfs;
+ }
+
+ float get_compressor_threshold_dbfs() const
+ {
+ return compressor_threshold_dbfs;
+ }
+
+ void set_limiter_threshold_dbfs(float threshold_dbfs)
+ {
+ limiter_threshold_dbfs = threshold_dbfs;
+ }
+
+ void set_compressor_threshold_dbfs(float threshold_dbfs)
+ {
+ compressor_threshold_dbfs = threshold_dbfs;
+ }
+
+ void set_limiter_enabled(bool enabled)
+ {
+ limiter_enabled = enabled;
+ }
+
+ bool get_limiter_enabled() const
+ {
+ return limiter_enabled;
+ }
+
+ void set_compressor_enabled(bool enabled)
+ {
+ compressor_enabled = enabled;
+ }
+
+ bool get_compressor_enabled() const
+ {
+ return compressor_enabled;
+ }
+
+ void set_gain_staging_db(float gain_db)
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ level_compressor_enabled = false;
+ gain_staging_db = gain_db;
+ }
+
+ float get_gain_staging_db() const
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ return gain_staging_db;
+ }
+
+ void set_gain_staging_auto(bool enabled)
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ level_compressor_enabled = enabled;
+ }
+
+ bool get_gain_staging_auto() const
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ return level_compressor_enabled;
+ }
+
+ void set_final_makeup_gain_db(float gain_db)
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ final_makeup_gain_auto = false;
+ final_makeup_gain = pow(10.0f, gain_db / 20.0f);
+ }
+
+ float get_final_makeup_gain_db()
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ return 20.0 * log10(final_makeup_gain);
+ }
+
+ void set_final_makeup_gain_auto(bool enabled)
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ final_makeup_gain_auto = enabled;
+ }
+
+ bool get_final_makeup_gain_auto() const
+ {
+ std::unique_lock<std::mutex> lock(compressor_mutex);
+ return final_makeup_gain_auto;
+ }
+
+private:
+ unsigned num_cards;
+
+ struct CaptureCard {
+ std::mutex audio_mutex;
+ std::unique_ptr<ResamplingQueue> resampling_queue; // Under audio_mutex.
+ int64_t next_local_pts = 0; // Beginning of next frame, in TIMEBASE units. Under audio_mutex.
+ };
+ CaptureCard cards[MAX_CARDS];
+
+ StereoFilter locut; // Default cutoff 120 Hz, 24 dB/oct.
+ std::atomic<float> locut_cutoff_hz;
+ std::atomic<bool> locut_enabled{true};
+
+ // First compressor; takes us up to about -12 dBFS.
+ mutable std::mutex compressor_mutex;
+ StereoCompressor level_compressor; // Under compressor_mutex. Used to set/override gain_staging_db if <level_compressor_enabled>.
+ float gain_staging_db = 0.0f; // Under compressor_mutex.
+ bool level_compressor_enabled = true; // Under compressor_mutex.
+
+ static constexpr float ref_level_dbfs = -14.0f; // Chosen so that we end up around 0 LU in practice.
+ static constexpr float ref_level_lufs = -23.0f; // 0 LU, more or less by definition.
+
+ std::atomic<float> loudness_lufs{ref_level_lufs};
+
+ StereoCompressor limiter;
+ std::atomic<float> limiter_threshold_dbfs{ref_level_dbfs + 4.0f}; // 4 dB.
+ std::atomic<bool> limiter_enabled{true};
+ StereoCompressor compressor;
+ std::atomic<float> compressor_threshold_dbfs{ref_level_dbfs - 12.0f}; // -12 dB.
+ std::atomic<bool> compressor_enabled{true};
+
+ double final_makeup_gain = 1.0; // Under compressor_mutex. Read/write by the user. Note: Not in dB, we want the numeric precision so that we can change it slowly.
+ bool final_makeup_gain_auto = true; // Under compressor_mutex.
+};
+
+#endif // !defined(_AUDIO_MIXER_H)
// TODO: Fetch all of the values these for completeness,
// not just the enable knobs implied by flags.
- ui->locut_enabled->setChecked(global_mixer->get_locut_enabled());
- ui->gainstaging_knob->setValue(global_mixer->get_gain_staging_db());
- ui->gainstaging_auto_checkbox->setChecked(global_mixer->get_gain_staging_auto());
- ui->compressor_enabled->setChecked(global_mixer->get_compressor_enabled());
- ui->limiter_enabled->setChecked(global_mixer->get_limiter_enabled());
- ui->makeup_gain_auto_checkbox->setChecked(global_mixer->get_final_makeup_gain_auto());
+ ui->locut_enabled->setChecked(global_mixer->get_audio_mixer()->get_locut_enabled());
+ ui->gainstaging_knob->setValue(global_mixer->get_audio_mixer()->get_gain_staging_db());
+ ui->gainstaging_auto_checkbox->setChecked(global_mixer->get_audio_mixer()->get_gain_staging_auto());
+ ui->compressor_enabled->setChecked(global_mixer->get_audio_mixer()->get_compressor_enabled());
+ ui->limiter_enabled->setChecked(global_mixer->get_audio_mixer()->get_limiter_enabled());
+ ui->makeup_gain_auto_checkbox->setChecked(global_mixer->get_audio_mixer()->get_final_makeup_gain_auto());
ui->limiter_threshold_db_display->setText(
- QString::fromStdString(format_db(mixer->get_limiter_threshold_dbfs(), DB_WITH_SIGN)));
+ QString::fromStdString(format_db(mixer->get_audio_mixer()->get_limiter_threshold_dbfs(), DB_WITH_SIGN)));
ui->compressor_threshold_db_display->setText(
- QString::fromStdString(format_db(mixer->get_compressor_threshold_dbfs(), DB_WITH_SIGN)));
+ QString::fromStdString(format_db(mixer->get_audio_mixer()->get_compressor_threshold_dbfs(), DB_WITH_SIGN)));
connect(ui->locut_cutoff_knob, &QDial::valueChanged, this, &MainWindow::cutoff_knob_changed);
cutoff_knob_changed(ui->locut_cutoff_knob->value());
connect(ui->locut_enabled, &QCheckBox::stateChanged, [this](int state){
- global_mixer->set_locut_enabled(state == Qt::Checked);
+ global_mixer->get_audio_mixer()->set_locut_enabled(state == Qt::Checked);
});
connect(ui->gainstaging_knob, &QAbstractSlider::valueChanged, this, &MainWindow::gain_staging_knob_changed);
connect(ui->gainstaging_auto_checkbox, &QCheckBox::stateChanged, [this](int state){
- global_mixer->set_gain_staging_auto(state == Qt::Checked);
+ global_mixer->get_audio_mixer()->set_gain_staging_auto(state == Qt::Checked);
});
connect(ui->makeup_gain_knob, &QAbstractSlider::valueChanged, this, &MainWindow::final_makeup_gain_knob_changed);
connect(ui->makeup_gain_auto_checkbox, &QCheckBox::stateChanged, [this](int state){
- global_mixer->set_final_makeup_gain_auto(state == Qt::Checked);
+ global_mixer->get_audio_mixer()->set_final_makeup_gain_auto(state == Qt::Checked);
});
connect(ui->limiter_threshold_knob, &QDial::valueChanged, this, &MainWindow::limiter_threshold_knob_changed);
connect(ui->compressor_threshold_knob, &QDial::valueChanged, this, &MainWindow::compressor_threshold_knob_changed);
connect(ui->limiter_enabled, &QCheckBox::stateChanged, [this](int state){
- global_mixer->set_limiter_enabled(state == Qt::Checked);
+ global_mixer->get_audio_mixer()->set_limiter_enabled(state == Qt::Checked);
});
connect(ui->compressor_enabled, &QCheckBox::stateChanged, [this](int state){
- global_mixer->set_compressor_enabled(state == Qt::Checked);
+ global_mixer->get_audio_mixer()->set_compressor_enabled(state == Qt::Checked);
});
connect(ui->reset_meters_button, &QPushButton::clicked, this, &MainWindow::reset_meters_button_clicked);
mixer->set_audio_level_callback(bind(&MainWindow::audio_level_callback, this, _1, _2, _3, _4, _5, _6, _7, _8));
ui->gainstaging_auto_checkbox->setCheckState(Qt::Unchecked);
float gain_db = value * 0.1f;
- global_mixer->set_gain_staging_db(gain_db);
+ global_mixer->get_audio_mixer()->set_gain_staging_db(gain_db);
// The label will be updated by the audio level callback.
}
ui->makeup_gain_auto_checkbox->setCheckState(Qt::Unchecked);
float gain_db = value * 0.1f;
- global_mixer->set_final_makeup_gain_db(gain_db);
+ global_mixer->get_audio_mixer()->set_final_makeup_gain_db(gain_db);
// The label will be updated by the audio level callback.
}
{
float octaves = value * 0.1f;
float cutoff_hz = 20.0 * pow(2.0, octaves);
- global_mixer->set_locut_cutoff(cutoff_hz);
+ global_mixer->get_audio_mixer()->set_locut_cutoff(cutoff_hz);
char buf[256];
snprintf(buf, sizeof(buf), "%ld Hz", lrintf(cutoff_hz));
void MainWindow::limiter_threshold_knob_changed(int value)
{
float threshold_dbfs = value * 0.1f;
- global_mixer->set_limiter_threshold_dbfs(threshold_dbfs);
+ global_mixer->get_audio_mixer()->set_limiter_threshold_dbfs(threshold_dbfs);
ui->limiter_threshold_db_display->setText(
QString::fromStdString(format_db(threshold_dbfs, DB_WITH_SIGN)));
}
void MainWindow::compressor_threshold_knob_changed(int value)
{
float threshold_dbfs = value * 0.1f;
- global_mixer->set_compressor_threshold_dbfs(threshold_dbfs);
+ global_mixer->get_audio_mixer()->set_compressor_threshold_dbfs(threshold_dbfs);
ui->compressor_threshold_db_display->setText(
QString::fromStdString(format_db(threshold_dbfs, DB_WITH_SIGN)));
}
namespace {
-void convert_fixed24_to_fp32(float *dst, size_t out_channels, const uint8_t *src, size_t in_channels, size_t num_samples)
-{
- assert(in_channels >= out_channels);
- for (size_t i = 0; i < num_samples; ++i) {
- for (size_t j = 0; j < out_channels; ++j) {
- uint32_t s1 = *src++;
- uint32_t s2 = *src++;
- uint32_t s3 = *src++;
- uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24);
- dst[i * out_channels + j] = int(s) * (1.0f / 2147483648.0f);
- }
- src += 3 * (in_channels - out_channels);
- }
-}
-
-void convert_fixed32_to_fp32(float *dst, size_t out_channels, const uint8_t *src, size_t in_channels, size_t num_samples)
-{
- assert(in_channels >= out_channels);
- for (size_t i = 0; i < num_samples; ++i) {
- for (size_t j = 0; j < out_channels; ++j) {
- int32_t s = le32toh(*(int32_t *)src);
- dst[i * out_channels + j] = s * (1.0f / 2147483648.0f);
- src += 4;
- }
- src += 4 * (in_channels - out_channels);
- }
-}
-
void insert_new_frame(RefCountedFrame frame, unsigned field_num, bool interlaced, unsigned card_index, InputState *input_state)
{
if (interlaced) {
num_cards(num_cards),
mixer_surface(create_surface(format)),
h264_encoder_surface(create_surface(format)),
- correlation(OUTPUT_FREQUENCY),
- level_compressor(OUTPUT_FREQUENCY),
- limiter(OUTPUT_FREQUENCY),
- compressor(OUTPUT_FREQUENCY)
+ audio_mixer(num_cards),
+ correlation(OUTPUT_FREQUENCY)
{
CHECK(init_movit(MOVIT_SHADER_DIR, MOVIT_DEBUG_OFF));
check_error();
r128.init(2, OUTPUT_FREQUENCY);
r128.integr_start();
- locut.init(FILTER_HPF, 2);
-
- set_locut_enabled(global_flags.locut_enabled);
- set_gain_staging_db(global_flags.initial_gain_staging_db);
- set_gain_staging_auto(global_flags.gain_staging_auto);
- set_compressor_enabled(global_flags.compressor_enabled);
- set_limiter_enabled(global_flags.limiter_enabled);
- set_final_makeup_gain_auto(global_flags.final_makeup_gain_auto);
-
// hlen=16 is pretty low quality, but we use quite a bit of CPU otherwise,
// and there's a limit to how important the peak meter is.
peak_resampler.setup(OUTPUT_FREQUENCY, OUTPUT_FREQUENCY * 4, /*num_channels=*/2, /*hlen=*/16, /*frel=*/1.0);
if (card->surface == nullptr) {
card->surface = create_surface_with_same_format(mixer_surface);
}
- {
- unique_lock<mutex> lock(cards[card_index].audio_mutex);
- card->resampling_queue.reset(new ResamplingQueue(card_index, OUTPUT_FREQUENCY, OUTPUT_FREQUENCY, 2));
- }
+ audio_mixer.reset_card(card_index);
while (!card->new_frames.empty()) card->new_frames.pop();
card->fractional_samples = 0;
card->last_timecode = -1;
- card->next_local_pts = 0;
card->capture->configure_card();
}
return;
}
- int64_t local_pts = card->next_local_pts;
int dropped_frames = 0;
if (card->last_timecode != -1) {
dropped_frames = unwrap_timecode(timecode, card->last_timecode) - card->last_timecode - 1;
}
- // Convert the audio to stereo fp32 and add it.
- vector<float> audio;
- audio.resize(num_samples * 2);
- switch (audio_format.bits_per_sample) {
- case 0:
- assert(num_samples == 0);
- break;
- case 24:
- convert_fixed24_to_fp32(&audio[0], 2, audio_frame.data + audio_offset, audio_format.num_channels, num_samples);
- break;
- case 32:
- convert_fixed32_to_fp32(&audio[0], 2, audio_frame.data + audio_offset, audio_format.num_channels, num_samples);
- break;
- default:
- fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
- assert(false);
- }
+ // Number of samples per frame if we need to insert silence.
+ // (Could be nonintegral, but resampling will save us then.)
+ const int silence_samples = OUTPUT_FREQUENCY * video_format.frame_rate_den / video_format.frame_rate_nom;
- // Add the audio.
- {
- unique_lock<mutex> lock(card->audio_mutex);
-
- // Number of samples per frame if we need to insert silence.
- // (Could be nonintegral, but resampling will save us then.)
- int silence_samples = OUTPUT_FREQUENCY * video_format.frame_rate_den / video_format.frame_rate_nom;
-
- if (dropped_frames > MAX_FPS * 2) {
- fprintf(stderr, "Card %d lost more than two seconds (or time code jumping around; from 0x%04x to 0x%04x), resetting resampler\n",
- card_index, card->last_timecode, timecode);
- card->resampling_queue.reset(new ResamplingQueue(card_index, OUTPUT_FREQUENCY, OUTPUT_FREQUENCY, 2));
- dropped_frames = 0;
- } else if (dropped_frames > 0) {
- // Insert silence as needed.
- fprintf(stderr, "Card %d dropped %d frame(s) (before timecode 0x%04x), inserting silence.\n",
- card_index, dropped_frames, timecode);
- vector<float> silence(silence_samples * 2, 0.0f);
- for (int i = 0; i < dropped_frames; ++i) {
- card->resampling_queue->add_input_samples(local_pts / double(TIMEBASE), silence.data(), silence_samples);
- // Note that if the format changed in the meantime, we have
- // no way of detecting that; we just have to assume the frame length
- // is always the same.
- local_pts += frame_length;
- }
- }
- if (num_samples == 0) {
- audio.resize(silence_samples * 2);
- num_samples = silence_samples;
- }
- card->resampling_queue->add_input_samples(local_pts / double(TIMEBASE), audio.data(), num_samples);
- card->next_local_pts = local_pts + frame_length;
+ if (dropped_frames > MAX_FPS * 2) {
+ fprintf(stderr, "Card %d lost more than two seconds (or time code jumping around; from 0x%04x to 0x%04x), resetting resampler\n",
+ card_index, card->last_timecode, timecode);
+ audio_mixer.reset_card(card_index);
+ dropped_frames = 0;
+ } else if (dropped_frames > 0) {
+ // Insert silence as needed.
+ fprintf(stderr, "Card %d dropped %d frame(s) (before timecode 0x%04x), inserting silence.\n",
+ card_index, dropped_frames, timecode);
+
+ audio_mixer.add_silence(card_index, silence_samples, dropped_frames, frame_length);
}
- card->last_timecode = timecode;
+ audio_mixer.add_audio(card_index, audio_frame.data + audio_offset, num_samples, audio_format, frame_length);
// Done with the audio, so release it.
if (audio_frame.owner) {
audio_frame.owner->release_frame(audio_frame);
}
+ card->last_timecode = timecode;
+
size_t expected_length = video_format.width * (video_format.height + video_format.extra_lines_top + video_format.extra_lines_bottom) * 2;
if (video_frame.len - video_offset == 0 ||
video_frame.len - video_offset != expected_length) {
return;
}
- unique_lock<mutex> lock(compressor_mutex);
+ unique_lock<mutex> lock(audio_measure_mutex);
double loudness_s = r128.loudness_S();
double loudness_i = r128.integrated();
double loudness_range_low = r128.range_min();
audio_level_callback(loudness_s, 20.0 * log10(peak),
loudness_i, loudness_range_low, loudness_range_high,
- gain_staging_db, 20.0 * log10(final_makeup_gain),
+ audio_mixer.get_gain_staging_db(),
+ audio_mixer.get_final_makeup_gain_db(),
correlation.get_correlation());
}
audio_task_queue.pop();
}
- process_audio_one_frame(task.pts_int, task.num_samples, task.adjust_rate);
+ ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy =
+ task.adjust_rate ? ResamplingQueue::ADJUST_RATE : ResamplingQueue::DO_NOT_ADJUST_RATE;
+ process_audio_one_frame(task.pts_int, task.num_samples, rate_adjustment_policy);
}
}
-void Mixer::process_audio_one_frame(int64_t frame_pts_int, int num_samples, bool adjust_rate)
+void Mixer::process_audio_one_frame(int64_t frame_pts_int, int num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy)
{
- vector<float> samples_card;
- vector<float> samples_out;
- samples_out.resize(num_samples * 2);
-
- // TODO: Allow more flexible input mapping.
- unsigned selected_audio_card = theme->map_signal(audio_source_channel);
- assert(selected_audio_card < num_cards);
-
- for (unsigned card_index = 0; card_index < num_cards; ++card_index) {
- samples_card.resize(num_samples * 2);
- {
- unique_lock<mutex> lock(cards[card_index].audio_mutex);
- ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy =
- adjust_rate ? ResamplingQueue::ADJUST_RATE : ResamplingQueue::DO_NOT_ADJUST_RATE;
- cards[card_index].resampling_queue->get_output_samples(
- double(frame_pts_int) / TIMEBASE,
- &samples_card[0],
- num_samples,
- rate_adjustment_policy);
- }
- if (card_index == 0) {
- for (int i = 0; i < num_samples * 2; ++i) {
- samples_out[i] = samples_card[i];
- }
- } else {
- for (int i = 0; i < num_samples * 2; ++i) {
- samples_out[i] += samples_card[i];
- }
- }
- }
-
- // Cut away everything under 120 Hz (or whatever the cutoff is);
- // we don't need it for voice, and it will reduce headroom
- // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
- // should be dampened.)
- if (locut_enabled) {
- locut.render(samples_out.data(), samples_out.size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
- }
-
- // Apply a level compressor to get the general level right.
- // Basically, if it's over about -40 dBFS, we squeeze it down to that level
- // (or more precisely, near it, since we don't use infinite ratio),
- // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
- // entirely arbitrary, but from practical tests with speech, it seems to
- // put ut around -23 LUFS, so it's a reasonable starting point for later use.
- {
- unique_lock<mutex> lock(compressor_mutex);
- if (level_compressor_enabled) {
- float threshold = 0.01f; // -40 dBFS.
- float ratio = 20.0f;
- float attack_time = 0.5f;
- float release_time = 20.0f;
- float makeup_gain = pow(10.0f, (ref_level_dbfs - (-40.0f)) / 20.0f); // +26 dB.
- level_compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
- gain_staging_db = 20.0 * log10(level_compressor.get_attenuation() * makeup_gain);
- } else {
- // Just apply the gain we already had.
- float g = pow(10.0f, gain_staging_db / 20.0f);
- for (size_t i = 0; i < samples_out.size(); ++i) {
- samples_out[i] *= g;
- }
- }
- }
-
-#if 0
- printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
- level_compressor.get_level(), 20.0 * log10(level_compressor.get_level()),
- level_compressor.get_attenuation(), 20.0 * log10(level_compressor.get_attenuation()),
- 20.0 * log10(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
-#endif
-
-// float limiter_att, compressor_att;
-
- // The real compressor.
- if (compressor_enabled) {
- float threshold = pow(10.0f, compressor_threshold_dbfs / 20.0f);
- float ratio = 20.0f;
- float attack_time = 0.005f;
- float release_time = 0.040f;
- float makeup_gain = 2.0f; // +6 dB.
- compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
-// compressor_att = compressor.get_attenuation();
- }
-
- // Finally a limiter at -4 dB (so, -10 dBFS) to take out the worst peaks only.
- // Note that since ratio is not infinite, we could go slightly higher than this.
- if (limiter_enabled) {
- float threshold = pow(10.0f, limiter_threshold_dbfs / 20.0f);
- float ratio = 30.0f;
- float attack_time = 0.0f; // Instant.
- float release_time = 0.020f;
- float makeup_gain = 1.0f; // 0 dB.
- limiter.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
-// limiter_att = limiter.get_attenuation();
- }
-
-// printf("limiter=%+5.1f compressor=%+5.1f\n", 20.0*log10(limiter_att), 20.0*log10(compressor_att));
-
- // At this point, we are most likely close to +0 LU, but all of our
- // measurements have been on raw sample values, not R128 values.
- // So we have a final makeup gain to get us to +0 LU; the gain
- // adjustments required should be relatively small, and also, the
- // offset shouldn't change much (only if the type of audio changes
- // significantly). Thus, we shoot for updating this value basically
- // “whenever we process buffers”, since the R128 calculation isn't exactly
- // something we get out per-sample.
- //
- // Note that there's a feedback loop here, so we choose a very slow filter
- // (half-time of 100 seconds).
- double target_loudness_factor, alpha;
- {
- unique_lock<mutex> lock(compressor_mutex);
- double loudness_lu = r128.loudness_M() - ref_level_lufs;
- double current_makeup_lu = 20.0f * log10(final_makeup_gain);
- target_loudness_factor = pow(10.0f, -loudness_lu / 20.0f);
-
- // If we're outside +/- 5 LU uncorrected, we don't count it as
- // a normal signal (probably silence) and don't change the
- // correction factor; just apply what we already have.
- if (fabs(loudness_lu - current_makeup_lu) >= 5.0 || !final_makeup_gain_auto) {
- alpha = 0.0;
- } else {
- // Formula adapted from
- // https://en.wikipedia.org/wiki/Low-pass_filter#Simple_infinite_impulse_response_filter.
- const double half_time_s = 100.0;
- const double fc_mul_2pi_delta_t = 1.0 / (half_time_s * OUTPUT_FREQUENCY);
- alpha = fc_mul_2pi_delta_t / (fc_mul_2pi_delta_t + 1.0);
- }
-
- double m = final_makeup_gain;
- for (size_t i = 0; i < samples_out.size(); i += 2) {
- samples_out[i + 0] *= m;
- samples_out[i + 1] *= m;
- m += (target_loudness_factor - m) * alpha;
- }
- final_makeup_gain = m;
- }
+ vector<float> samples_out = audio_mixer.get_output(double(frame_pts_int) / TIMEBASE, num_samples, rate_adjustment_policy);
// Upsample 4x to find interpolated peak.
peak_resampler.inp_data = samples_out.data();
vector<float> interpolated_samples_out;
interpolated_samples_out.resize(samples_out.size());
- while (peak_resampler.inp_count > 0) { // About four iterations.
- peak_resampler.out_data = &interpolated_samples_out[0];
- peak_resampler.out_count = interpolated_samples_out.size() / 2;
- peak_resampler.process();
- size_t out_stereo_samples = interpolated_samples_out.size() / 2 - peak_resampler.out_count;
- peak = max<float>(peak, find_peak(interpolated_samples_out.data(), out_stereo_samples * 2));
- peak_resampler.out_data = nullptr;
+ {
+ unique_lock<mutex> lock(audio_measure_mutex);
+
+ while (peak_resampler.inp_count > 0) { // About four iterations.
+ peak_resampler.out_data = &interpolated_samples_out[0];
+ peak_resampler.out_count = interpolated_samples_out.size() / 2;
+ peak_resampler.process();
+ size_t out_stereo_samples = interpolated_samples_out.size() / 2 - peak_resampler.out_count;
+ peak = max<float>(peak, find_peak(interpolated_samples_out.data(), out_stereo_samples * 2));
+ peak_resampler.out_data = nullptr;
+ }
}
// Find R128 levels and L/R correlation.
deinterleave_samples(samples_out, &left, &right);
float *ptrs[] = { left.data(), right.data() };
{
- unique_lock<mutex> lock(compressor_mutex);
+ unique_lock<mutex> lock(audio_measure_mutex);
r128.process(left.size(), ptrs);
+ audio_mixer.set_current_loudness(r128.loudness_M());
correlation.process_samples(samples_out);
}
void Mixer::reset_meters()
{
+ unique_lock<mutex> lock(audio_measure_mutex);
peak_resampler.reset();
peak = 0.0f;
r128.reset();
#include "bmusb/bmusb.h"
#include "alsa_output.h"
+#include "audio_mixer.h"
#include "ebu_r128_proc.h"
#include "video_encoder.h"
#include "httpd.h"
theme->set_wb(channel, r, g, b);
}
- void set_locut_cutoff(float cutoff_hz)
- {
- locut_cutoff_hz = cutoff_hz;
- }
-
- void set_locut_enabled(bool enabled)
- {
- locut_enabled = enabled;
- }
-
- bool get_locut_enabled() const
- {
- return locut_enabled;
- }
-
- float get_limiter_threshold_dbfs()
- {
- return limiter_threshold_dbfs;
- }
-
- float get_compressor_threshold_dbfs()
- {
- return compressor_threshold_dbfs;
- }
-
- void set_limiter_threshold_dbfs(float threshold_dbfs)
- {
- limiter_threshold_dbfs = threshold_dbfs;
- }
-
- void set_compressor_threshold_dbfs(float threshold_dbfs)
- {
- compressor_threshold_dbfs = threshold_dbfs;
- }
-
- void set_limiter_enabled(bool enabled)
- {
- limiter_enabled = enabled;
- }
-
- bool get_limiter_enabled() const
- {
- return limiter_enabled;
- }
-
- void set_compressor_enabled(bool enabled)
- {
- compressor_enabled = enabled;
- }
-
- bool get_compressor_enabled() const
- {
- return compressor_enabled;
- }
-
- void set_gain_staging_db(float gain_db)
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- level_compressor_enabled = false;
- gain_staging_db = gain_db;
- }
-
- float get_gain_staging_db() const
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- return gain_staging_db;
- }
-
- void set_gain_staging_auto(bool enabled)
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- level_compressor_enabled = enabled;
- }
-
- bool get_gain_staging_auto() const
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- return level_compressor_enabled;
- }
-
- void set_final_makeup_gain_db(float gain_db)
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- final_makeup_gain_auto = false;
- final_makeup_gain = pow(10.0f, gain_db / 20.0f);
- }
-
- void set_final_makeup_gain_auto(bool enabled)
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- final_makeup_gain_auto = enabled;
- }
-
- bool get_final_makeup_gain_auto() const
- {
- std::unique_lock<std::mutex> lock(compressor_mutex);
- return final_makeup_gain_auto;
- }
+ AudioMixer *get_audio_mixer() { return &audio_mixer; }
+ const AudioMixer *get_audio_mixer() const { return &audio_mixer; }
void schedule_cut()
{
void render_one_frame(int64_t duration);
void send_audio_level_callback();
void audio_thread_func();
- void process_audio_one_frame(int64_t frame_pts_int, int num_samples, bool adjust_rate);
+ void process_audio_one_frame(int64_t frame_pts_int, int num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy);
void subsample_chroma(GLuint src_tex, GLuint dst_dst);
void release_display_frame(DisplayFrame *frame);
double pts() { return double(pts_int) / TIMEBASE; }
// frame rate is integer, will always stay zero.
unsigned fractional_samples = 0;
- std::mutex audio_mutex;
- std::unique_ptr<ResamplingQueue> resampling_queue; // Under audio_mutex.
int last_timecode = -1; // Unwrapped.
- int64_t next_local_pts = 0; // Beginning of next frame, in TIMEBASE units.
};
CaptureCard cards[MAX_CARDS]; // protected by <bmusb_mutex>
+ AudioMixer audio_mixer;
void get_one_frame_from_each_card(unsigned master_card_index, CaptureCard::NewFrame new_frames[MAX_CARDS], bool has_new_frame[MAX_CARDS], int num_samples[MAX_CARDS]);
InputState input_state;
std::atomic<bool> should_cut{false};
audio_level_callback_t audio_level_callback = nullptr;
- mutable std::mutex compressor_mutex;
- Ebu_r128_proc r128; // Under compressor_mutex.
- CorrelationMeasurer correlation; // Under compressor_mutex.
-
- Resampler peak_resampler;
+ mutable std::mutex audio_measure_mutex;
+ Ebu_r128_proc r128; // Under audio_measure_mutex.
+ CorrelationMeasurer correlation; // Under audio_measure_mutex.
+ Resampler peak_resampler; // Under audio_measure_mutex.
std::atomic<float> peak{0.0f};
- StereoFilter locut; // Default cutoff 120 Hz, 24 dB/oct.
- std::atomic<float> locut_cutoff_hz;
- std::atomic<bool> locut_enabled{true};
-
- // First compressor; takes us up to about -12 dBFS.
- StereoCompressor level_compressor; // Under compressor_mutex. Used to set/override gain_staging_db if <level_compressor_enabled>.
- float gain_staging_db = 0.0f; // Under compressor_mutex.
- bool level_compressor_enabled = true; // Under compressor_mutex.
-
- static constexpr float ref_level_dbfs = -14.0f; // Chosen so that we end up around 0 LU in practice.
- static constexpr float ref_level_lufs = -23.0f; // 0 LU, more or less by definition.
-
- StereoCompressor limiter;
- std::atomic<float> limiter_threshold_dbfs{ref_level_dbfs + 4.0f}; // 4 dB.
- std::atomic<bool> limiter_enabled{true};
- StereoCompressor compressor;
- std::atomic<float> compressor_threshold_dbfs{ref_level_dbfs - 12.0f}; // -12 dB.
- std::atomic<bool> compressor_enabled{true};
-
- double final_makeup_gain = 1.0; // Under compressor_mutex. Read/write by the user. Note: Not in dB, we want the numeric precision so that we can change it slowly.
- bool final_makeup_gain_auto = true; // Under compressor_mutex.
-
std::unique_ptr<ALSAOutput> alsa;
struct AudioTask {