Add a benchmark for a simple audio chain; easier than profiling all of Nageru.

[nageru] / audio_mixer.cpp
diff --git a/audio_mixer.cpp b/audio_mixer.cpp

index 1c570fa870468e54134ac892c5eac016372754ce..8d09ad3bfa9a279f5d74af38613503ce835311e8 100644 (file)
--- a/audio_mixer.cpp
+++ b/audio_mixer.cpp
@@ -77,20 +77,47 @@ void convert_fixed32_to_fp32(float *dst, size_t out_channel, size_t out_num_chan
         }
  }
  
+float find_peak(const float *samples, size_t num_samples)
+{
+       float m = fabs(samples[0]);
+       for (size_t i = 1; i < num_samples; ++i) {
+               m = max(m, fabs(samples[i]));
+       }
+       return m;
+}
+
+void deinterleave_samples(const vector<float> &in, vector<float> *out_l, vector<float> *out_r)
+{
+       size_t num_samples = in.size() / 2;
+       out_l->resize(num_samples);
+       out_r->resize(num_samples);
+
+       const float *inptr = in.data();
+       float *lptr = &(*out_l)[0];
+       float *rptr = &(*out_r)[0];
+       for (size_t i = 0; i < num_samples; ++i) {
+               *lptr++ = *inptr++;
+               *rptr++ = *inptr++;
+       }
+}
+
  }  // namespace
  
  AudioMixer::AudioMixer(unsigned num_cards)
         : num_cards(num_cards),
-         level_compressor(OUTPUT_FREQUENCY),
           limiter(OUTPUT_FREQUENCY),
-         compressor(OUTPUT_FREQUENCY)
+         correlation(OUTPUT_FREQUENCY)
  {
-       locut.init(FILTER_HPF, 2);
-
-       set_locut_enabled(global_flags.locut_enabled);
-       set_gain_staging_db(global_flags.initial_gain_staging_db);
-       set_gain_staging_auto(global_flags.gain_staging_auto);
-       set_compressor_enabled(global_flags.compressor_enabled);
+       for (unsigned bus_index = 0; bus_index < MAX_BUSES; ++bus_index) {
+               locut[bus_index].init(FILTER_HPF, 2);
+               locut_enabled[bus_index] = global_flags.locut_enabled;
+               gain_staging_db[bus_index] = global_flags.initial_gain_staging_db;
+               compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
+               compressor_threshold_dbfs[bus_index] = ref_level_dbfs - 12.0f;  // -12 dB.
+               compressor_enabled[bus_index] = global_flags.compressor_enabled;
+               level_compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
+               level_compressor_enabled[bus_index] = global_flags.gain_staging_auto;
+       }
         set_limiter_enabled(global_flags.limiter_enabled);
         set_final_makeup_gain_auto(global_flags.final_makeup_gain_auto);
  
@@ -108,6 +135,13 @@ AudioMixer::AudioMixer(unsigned num_cards)
  
         // Look for ALSA cards.
         available_alsa_cards = ALSAInput::enumerate_devices();
+
+       r128.init(2, OUTPUT_FREQUENCY);
+       r128.integr_start();
+
+       // hlen=16 is pretty low quality, but we use quite a bit of CPU otherwise,
+       // and there's a limit to how important the peak meter is.
+       peak_resampler.setup(OUTPUT_FREQUENCY, OUTPUT_FREQUENCY * 4, /*num_channels=*/2, /*hlen=*/16, /*frel=*/1.0);
  }
  
  AudioMixer::~AudioMixer()
@@ -153,7 +187,8 @@ void AudioMixer::reset_alsa_mutex_held(DeviceSpec device_spec)
         if (device->interesting_channels.empty()) {
                 device->alsa_device.reset();
         } else {
-               device->alsa_device.reset(new ALSAInput(available_alsa_cards[card_index].address.c_str(), OUTPUT_FREQUENCY, 2, bind(&AudioMixer::add_audio, this, device_spec, _1, _2, _3, _4)));
+               const ALSAInput::Device &alsa_dev = available_alsa_cards[card_index];
+               device->alsa_device.reset(new ALSAInput(alsa_dev.address.c_str(), OUTPUT_FREQUENCY, alsa_dev.num_channels, bind(&AudioMixer::add_audio, this, device_spec, _1, _2, _3, _4)));
                 device->capture_frequency = device->alsa_device->get_sample_rate();
                 device->alsa_device->start_capture_thread();
         }
@@ -317,79 +352,82 @@ vector<float> AudioMixer::get_output(double pts, unsigned num_samples, Resamplin
                 }
         }
  
-       // TODO: Move lo-cut etc. into each bus.
-       vector<float> samples_out;
+       vector<float> samples_out, left, right;
         samples_out.resize(num_samples * 2);
         samples_bus.resize(num_samples * 2);
         for (unsigned bus_index = 0; bus_index < input_mapping.buses.size(); ++bus_index) {
                 fill_audio_bus(samples_card, input_mapping.buses[bus_index], num_samples, &samples_bus[0]);
  
-               float volume = from_db(fader_volume_db[bus_index]);
-               if (bus_index == 0) {
-                       for (unsigned i = 0; i < num_samples * 2; ++i) {
-                               samples_out[i] = samples_bus[i] * volume;
-                       }
-               } else {
-                       for (unsigned i = 0; i < num_samples * 2; ++i) {
-                               samples_out[i] += samples_bus[i] * volume;
-                       }
+               // Cut away everything under 120 Hz (or whatever the cutoff is);
+               // we don't need it for voice, and it will reduce headroom
+               // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
+               // should be dampened.)
+               if (locut_enabled[bus_index]) {
+                       locut[bus_index].render(samples_bus.data(), samples_bus.size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
                 }
-       }
-
-       // Cut away everything under 120 Hz (or whatever the cutoff is);
-       // we don't need it for voice, and it will reduce headroom
-       // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
-       // should be dampened.)
-       if (locut_enabled) {
-               locut.render(samples_out.data(), samples_out.size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
-       }
  
-       {
-               lock_guard<mutex> lock(compressor_mutex);
-
-               // Apply a level compressor to get the general level right.
-               // Basically, if it's over about -40 dBFS, we squeeze it down to that level
-               // (or more precisely, near it, since we don't use infinite ratio),
-               // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
-               // entirely arbitrary, but from practical tests with speech, it seems to
-               // put ut around -23 LUFS, so it's a reasonable starting point for later use.
                 {
-                       if (level_compressor_enabled) {
+                       lock_guard<mutex> lock(compressor_mutex);
+
+                       // Apply a level compressor to get the general level right.
+                       // Basically, if it's over about -40 dBFS, we squeeze it down to that level
+                       // (or more precisely, near it, since we don't use infinite ratio),
+                       // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
+                       // entirely arbitrary, but from practical tests with speech, it seems to
+                       // put ut around -23 LUFS, so it's a reasonable starting point for later use.
+                       if (level_compressor_enabled[bus_index]) {
                                 float threshold = 0.01f;   // -40 dBFS.
                                 float ratio = 20.0f;
                                 float attack_time = 0.5f;
                                 float release_time = 20.0f;
                                 float makeup_gain = from_db(ref_level_dbfs - (-40.0f));  // +26 dB.
-                               level_compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
-                               gain_staging_db = to_db(level_compressor.get_attenuation() * makeup_gain);
+                               level_compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
+                               gain_staging_db[bus_index] = to_db(level_compressor[bus_index]->get_attenuation() * makeup_gain);
                         } else {
                                 // Just apply the gain we already had.
-                               float g = from_db(gain_staging_db);
-                               for (size_t i = 0; i < samples_out.size(); ++i) {
-                                       samples_out[i] *= g;
+                               float g = from_db(gain_staging_db[bus_index]);
+                               for (size_t i = 0; i < samples_bus.size(); ++i) {
+                                       samples_bus[i] *= g;
                                 }
                         }
+
+#if 0
+                       printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
+                               level_compressor.get_level(), to_db(level_compressor.get_level()),
+                               level_compressor.get_attenuation(), to_db(level_compressor.get_attenuation()),
+                               to_db(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
+#endif
+
+                       // The real compressor.
+                       if (compressor_enabled[bus_index]) {
+                               float threshold = from_db(compressor_threshold_dbfs[bus_index]);
+                               float ratio = 20.0f;
+                               float attack_time = 0.005f;
+                               float release_time = 0.040f;
+                               float makeup_gain = 2.0f;  // +6 dB.
+                               compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
+               //              compressor_att = compressor.get_attenuation();
+                       }
                 }
  
-       #if 0
-               printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
-                       level_compressor.get_level(), to_db(level_compressor.get_level()),
-                       level_compressor.get_attenuation(), to_db(level_compressor.get_attenuation()),
-                       to_db(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
-       #endif
-
-       //      float limiter_att, compressor_att;
-
-               // The real compressor.
-               if (compressor_enabled) {
-                       float threshold = from_db(compressor_threshold_dbfs);
-                       float ratio = 20.0f;
-                       float attack_time = 0.005f;
-                       float release_time = 0.040f;
-                       float makeup_gain = 2.0f;  // +6 dB.
-                       compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
-       //              compressor_att = compressor.get_attenuation();
+               // TODO: We should measure post-fader.
+               deinterleave_samples(samples_bus, &left, &right);
+               measure_bus_levels(bus_index, left, right);
+
+               float volume = from_db(fader_volume_db[bus_index]);
+               if (bus_index == 0) {
+                       for (unsigned i = 0; i < num_samples * 2; ++i) {
+                               samples_out[i] = samples_bus[i] * volume;
+                       }
+               } else {
+                       for (unsigned i = 0; i < num_samples * 2; ++i) {
+                               samples_out[i] += samples_bus[i] * volume;
+                       }
                 }
+       }
+
+       {
+               lock_guard<mutex> lock(compressor_mutex);
  
                 // Finally a limiter at -4 dB (so, -10 dBFS) to take out the worst peaks only.
                 // Note that since ratio is not infinite, we could go slightly higher than this.
@@ -406,7 +444,8 @@ vector<float> AudioMixer::get_output(double pts, unsigned num_samples, Resamplin
         //      printf("limiter=%+5.1f  compressor=%+5.1f\n", to_db(limiter_att), to_db(compressor_att));
         }
  
-       // At this point, we are most likely close to +0 LU, but all of our
+       // At this point, we are most likely close to +0 LU (at least if the
+       // faders sum to 0 dB and the compressors are on), but all of our
         // measurements have been on raw sample values, not R128 values.
         // So we have a final makeup gain to get us to +0 LU; the gain
         // adjustments required should be relatively small, and also, the
@@ -418,7 +457,7 @@ vector<float> AudioMixer::get_output(double pts, unsigned num_samples, Resamplin
         // Note that there's a feedback loop here, so we choose a very slow filter
         // (half-time of 30 seconds).
         double target_loudness_factor, alpha;
-       double loudness_lu = loudness_lufs - ref_level_lufs;
+       double loudness_lu = r128.loudness_M() - ref_level_lufs;
         double current_makeup_lu = to_db(final_makeup_gain);
         target_loudness_factor = final_makeup_gain * from_db(-loudness_lu);
  
@@ -446,9 +485,97 @@ vector<float> AudioMixer::get_output(double pts, unsigned num_samples, Resamplin
                 final_makeup_gain = m;
         }
  
+       update_meters(samples_out);
+
         return samples_out;
  }
  
+void AudioMixer::measure_bus_levels(unsigned bus_index, const vector<float> &left, const vector<float> &right)
+{
+       const float *ptrs[] = { left.data(), right.data() };
+       {
+               lock_guard<mutex> lock(audio_measure_mutex);
+               bus_r128[bus_index]->process(left.size(), const_cast<float **>(ptrs));
+       }
+}
+
+void AudioMixer::update_meters(const vector<float> &samples)
+{
+       // Upsample 4x to find interpolated peak.
+       peak_resampler.inp_data = const_cast<float *>(samples.data());
+       peak_resampler.inp_count = samples.size() / 2;
+
+       vector<float> interpolated_samples;
+       interpolated_samples.resize(samples.size());
+       {
+               lock_guard<mutex> lock(audio_measure_mutex);
+
+               while (peak_resampler.inp_count > 0) {  // About four iterations.
+                       peak_resampler.out_data = &interpolated_samples[0];
+                       peak_resampler.out_count = interpolated_samples.size() / 2;
+                       peak_resampler.process();
+                       size_t out_stereo_samples = interpolated_samples.size() / 2 - peak_resampler.out_count;
+                       peak = max<float>(peak, find_peak(interpolated_samples.data(), out_stereo_samples * 2));
+                       peak_resampler.out_data = nullptr;
+               }
+       }
+
+       // Find R128 levels and L/R correlation.
+       vector<float> left, right;
+       deinterleave_samples(samples, &left, &right);
+       float *ptrs[] = { left.data(), right.data() };
+       {
+               lock_guard<mutex> lock(audio_measure_mutex);
+               r128.process(left.size(), ptrs);
+               correlation.process_samples(samples);
+       }
+
+       send_audio_level_callback();
+}
+
+void AudioMixer::reset_meters()
+{
+       lock_guard<mutex> lock(audio_measure_mutex);
+       peak_resampler.reset();
+       peak = 0.0f;
+       r128.reset();
+       r128.integr_start();
+       correlation.reset();
+}
+
+void AudioMixer::send_audio_level_callback()
+{
+       if (audio_level_callback == nullptr) {
+               return;
+       }
+
+       lock_guard<mutex> lock(audio_measure_mutex);
+       double loudness_s = r128.loudness_S();
+       double loudness_i = r128.integrated();
+       double loudness_range_low = r128.range_min();
+       double loudness_range_high = r128.range_max();
+
+       vector<BusLevel> bus_levels;
+       bus_levels.resize(input_mapping.buses.size());
+       {
+               lock_guard<mutex> lock(compressor_mutex);
+               for (unsigned bus_index = 0; bus_index < bus_r128.size(); ++bus_index) {
+                       bus_levels[bus_index].loudness_lufs = bus_r128[bus_index]->loudness_S();
+                       bus_levels[bus_index].gain_staging_db = gain_staging_db[bus_index];
+                       if (compressor_enabled[bus_index]) {
+                               bus_levels[bus_index].compressor_attenuation_db = -to_db(compressor[bus_index]->get_attenuation());
+                       } else {
+                               bus_levels[bus_index].compressor_attenuation_db = 0.0;
+                       }
+               }
+       }
+
+       audio_level_callback(loudness_s, to_db(peak), bus_levels,
+               loudness_i, loudness_range_low, loudness_range_high,
+               to_db(final_makeup_gain),
+               correlation.get_correlation());
+}
+
  map<DeviceSpec, DeviceInfo> AudioMixer::get_devices() const
  {
         lock_guard<timed_mutex> lock(audio_mutex);
@@ -514,6 +641,17 @@ void AudioMixer::set_input_mapping(const InputMapping &new_input_mapping)
                 }
         }
  
+       {
+               lock_guard<mutex> lock(audio_measure_mutex);
+               bus_r128.resize(new_input_mapping.buses.size());
+               for (unsigned bus_index = 0; bus_index < bus_r128.size(); ++bus_index) {
+                       if (bus_r128[bus_index] == nullptr) {
+                               bus_r128[bus_index].reset(new Ebu_r128_proc);
+                       }
+                       bus_r128[bus_index]->init(2, OUTPUT_FREQUENCY);
+               }
+       }
+
         input_mapping = new_input_mapping;
  }