git.sesse.net Git - nageru/blob - audio_mixer.cpp

   1 #include "audio_mixer.h"
   2
   3 #include <assert.h>
   4 #include <bmusb/bmusb.h>
   5 #include <endian.h>
   6 #include <math.h>
   7 #ifdef __SSE2__
   8 #include <immintrin.h>
   9 #endif
  10 #include <stdbool.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <algorithm>
  15 #include <chrono>
  16 #include <cmath>
  17 #include <cstddef>
  18 #include <limits>
  19 #include <utility>
  20
  21 #include "db.h"
  22 #include "flags.h"
  23 #include "state.pb.h"
  24 #include "timebase.h"
  25
  26 using namespace bmusb;
  27 using namespace std;
  28 using namespace std::chrono;
  29 using namespace std::placeholders;
  30
  31 namespace {
  32
  33 // TODO: If these prove to be a bottleneck, they can be SSSE3-optimized
  34 // (usually including multiple channels at a time).
  35
  36 void convert_fixed16_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  37                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  38                              size_t num_samples)
  39 {
  40         assert(in_channel < in_num_channels);
  41         assert(out_channel < out_num_channels);
  42         src += in_channel * 2;
  43         dst += out_channel;
  44
  45         for (size_t i = 0; i < num_samples; ++i) {
  46                 int16_t s = le16toh(*(int16_t *)src);
  47                 *dst = s * (1.0f / 32768.0f);
  48
  49                 src += 2 * in_num_channels;
  50                 dst += out_num_channels;
  51         }
  52 }
  53
  54 void convert_fixed24_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  55                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  56                              size_t num_samples)
  57 {
  58         assert(in_channel < in_num_channels);
  59         assert(out_channel < out_num_channels);
  60         src += in_channel * 3;
  61         dst += out_channel;
  62
  63         for (size_t i = 0; i < num_samples; ++i) {
  64                 uint32_t s1 = src[0];
  65                 uint32_t s2 = src[1];
  66                 uint32_t s3 = src[2];
  67                 uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24);
  68                 *dst = int(s) * (1.0f / 2147483648.0f);
  69
  70                 src += 3 * in_num_channels;
  71                 dst += out_num_channels;
  72         }
  73 }
  74
  75 void convert_fixed32_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  76                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  77                              size_t num_samples)
  78 {
  79         assert(in_channel < in_num_channels);
  80         assert(out_channel < out_num_channels);
  81         src += in_channel * 4;
  82         dst += out_channel;
  83
  84         for (size_t i = 0; i < num_samples; ++i) {
  85                 int32_t s = le32toh(*(int32_t *)src);
  86                 *dst = s * (1.0f / 2147483648.0f);
  87
  88                 src += 4 * in_num_channels;
  89                 dst += out_num_channels;
  90         }
  91 }
  92
  93 float find_peak_plain(const float *samples, size_t num_samples) __attribute__((unused));
  94
  95 float find_peak_plain(const float *samples, size_t num_samples)
  96 {
  97         float m = fabs(samples[0]);
  98         for (size_t i = 1; i < num_samples; ++i) {
  99                 m = max(m, fabs(samples[i]));
 100         }
 101         return m;
 102 }
 103
 104 #ifdef __SSE__
 105 static inline float horizontal_max(__m128 m)
 106 {
 107         __m128 tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 0, 3, 2));
 108         m = _mm_max_ps(m, tmp);
 109         tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 3, 0, 1));
 110         m = _mm_max_ps(m, tmp);
 111         return _mm_cvtss_f32(m);
 112 }
 113
 114 float find_peak(const float *samples, size_t num_samples)
 115 {
 116         const __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu));
 117         __m128 m = _mm_setzero_ps();
 118         for (size_t i = 0; i < (num_samples & ~3); i += 4) {
 119                 __m128 x = _mm_loadu_ps(samples + i);
 120                 x = _mm_and_ps(x, abs_mask);
 121                 m = _mm_max_ps(m, x);
 122         }
 123         float result = horizontal_max(m);
 124
 125         for (size_t i = (num_samples & ~3); i < num_samples; ++i) {
 126                 result = max(result, fabs(samples[i]));
 127         }
 128
 129 #if 0
 130         // Self-test. We should be bit-exact the same.
 131         float reference_result = find_peak_plain(samples, num_samples);
 132         if (result != reference_result) {
 133                 fprintf(stderr, "Error: Peak is %f [%f %f %f %f]; should be %f.\n",
 134                         result,
 135                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(0, 0, 0, 0))),
 136                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))),
 137                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))),
 138                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))),
 139                         reference_result);
 140                 abort();
 141         }
 142 #endif
 143         return result;
 144 }
 145 #else
 146 float find_peak(const float *samples, size_t num_samples)
 147 {
 148         return find_peak_plain(samples, num_samples);
 149 }
 150 #endif
 151
 152 void deinterleave_samples(const vector<float> &in, vector<float> *out_l, vector<float> *out_r)
 153 {
 154         size_t num_samples = in.size() / 2;
 155         out_l->resize(num_samples);
 156         out_r->resize(num_samples);
 157
 158         const float *inptr = in.data();
 159         float *lptr = &(*out_l)[0];
 160         float *rptr = &(*out_r)[0];
 161         for (size_t i = 0; i < num_samples; ++i) {
 162                 *lptr++ = *inptr++;
 163                 *rptr++ = *inptr++;
 164         }
 165 }
 166
 167 }  // namespace
 168
 169 AudioMixer::AudioMixer(unsigned num_cards)
 170         : num_cards(num_cards),
 171           limiter(OUTPUT_FREQUENCY),
 172           correlation(OUTPUT_FREQUENCY)
 173 {
 174         for (unsigned bus_index = 0; bus_index < MAX_BUSES; ++bus_index) {
 175                 locut[bus_index].init(FILTER_HPF, 2);
 176                 eq[bus_index][EQ_BAND_BASS].init(FILTER_LOW_SHELF, 1);
 177                 // Note: EQ_BAND_MID isn't used (see comments in apply_eq()).
 178                 eq[bus_index][EQ_BAND_TREBLE].init(FILTER_HIGH_SHELF, 1);
 179                 compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
 180                 level_compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
 181
 182                 set_bus_settings(bus_index, get_default_bus_settings());
 183         }
 184         set_limiter_enabled(global_flags.limiter_enabled);
 185         set_final_makeup_gain_auto(global_flags.final_makeup_gain_auto);
 186
 187         if (!global_flags.input_mapping_filename.empty()) {
 188                 current_mapping_mode = MappingMode::MULTICHANNEL;
 189                 InputMapping new_input_mapping;
 190                 if (!load_input_mapping_from_file(get_devices(),
 191                                                   global_flags.input_mapping_filename,
 192                                                   &new_input_mapping)) {
 193                         fprintf(stderr, "Failed to load input mapping from '%s', exiting.\n",
 194                                 global_flags.input_mapping_filename.c_str());
 195                         exit(1);
 196                 }
 197                 set_input_mapping(new_input_mapping);
 198         } else {
 199                 set_simple_input(/*card_index=*/0);
 200                 if (global_flags.multichannel_mapping_mode) {
 201                         current_mapping_mode = MappingMode::MULTICHANNEL;
 202                 }
 203         }
 204
 205         r128.init(2, OUTPUT_FREQUENCY);
 206         r128.integr_start();
 207
 208         // hlen=16 is pretty low quality, but we use quite a bit of CPU otherwise,
 209         // and there's a limit to how important the peak meter is.
 210         peak_resampler.setup(OUTPUT_FREQUENCY, OUTPUT_FREQUENCY * 4, /*num_channels=*/2, /*hlen=*/16, /*frel=*/1.0);
 211
 212         global_audio_mixer = this;
 213         alsa_pool.init();
 214 }
 215
 216 void AudioMixer::reset_resampler(DeviceSpec device_spec)
 217 {
 218         lock_guard<timed_mutex> lock(audio_mutex);
 219         reset_resampler_mutex_held(device_spec);
 220 }
 221
 222 void AudioMixer::reset_resampler_mutex_held(DeviceSpec device_spec)
 223 {
 224         AudioDevice *device = find_audio_device(device_spec);
 225
 226         if (device->interesting_channels.empty()) {
 227                 device->resampling_queue.reset();
 228         } else {
 229                 // TODO: ResamplingQueue should probably take the full device spec.
 230                 // (It's only used for console output, though.)
 231                 device->resampling_queue.reset(new ResamplingQueue(
 232                         device_spec.index, device->capture_frequency, OUTPUT_FREQUENCY, device->interesting_channels.size(),
 233                         global_flags.audio_queue_length_ms * 0.001));
 234         }
 235 }
 236
 237 bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned num_samples, AudioFormat audio_format, int64_t frame_length, steady_clock::time_point frame_time)
 238 {
 239         AudioDevice *device = find_audio_device(device_spec);
 240
 241         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 242         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 243                 return false;
 244         }
 245         if (device->resampling_queue == nullptr) {
 246                 // No buses use this device; throw it away.
 247                 return true;
 248         }
 249
 250         unsigned num_channels = device->interesting_channels.size();
 251         assert(num_channels > 0);
 252
 253         // Convert the audio to fp32.
 254         unique_ptr<float[]> audio(new float[num_samples * num_channels]);
 255         unsigned channel_index = 0;
 256         for (auto channel_it = device->interesting_channels.cbegin(); channel_it != device->interesting_channels.end(); ++channel_it, ++channel_index) {
 257                 switch (audio_format.bits_per_sample) {
 258                 case 0:
 259                         assert(num_samples == 0);
 260                         break;
 261                 case 16:
 262                         convert_fixed16_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 263                         break;
 264                 case 24:
 265                         convert_fixed24_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 266                         break;
 267                 case 32:
 268                         convert_fixed32_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 269                         break;
 270                 default:
 271                         fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
 272                         assert(false);
 273                 }
 274         }
 275
 276         // Now add it.
 277         device->resampling_queue->add_input_samples(frame_time, audio.get(), num_samples, ResamplingQueue::ADJUST_RATE);
 278         return true;
 279 }
 280
 281 bool AudioMixer::add_silence(DeviceSpec device_spec, unsigned samples_per_frame, unsigned num_frames, int64_t frame_length)
 282 {
 283         AudioDevice *device = find_audio_device(device_spec);
 284
 285         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 286         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 287                 return false;
 288         }
 289         if (device->resampling_queue == nullptr) {
 290                 // No buses use this device; throw it away.
 291                 return true;
 292         }
 293
 294         unsigned num_channels = device->interesting_channels.size();
 295         assert(num_channels > 0);
 296
 297         vector<float> silence(samples_per_frame * num_channels, 0.0f);
 298         for (unsigned i = 0; i < num_frames; ++i) {
 299                 device->resampling_queue->add_input_samples(steady_clock::now(), silence.data(), samples_per_frame, ResamplingQueue::DO_NOT_ADJUST_RATE);
 300         }
 301         return true;
 302 }
 303
 304 bool AudioMixer::silence_card(DeviceSpec device_spec, bool silence)
 305 {
 306         AudioDevice *device = find_audio_device(device_spec);
 307
 308         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 309         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 310                 return false;
 311         }
 312
 313         if (device->silenced && !silence) {
 314                 reset_resampler_mutex_held(device_spec);
 315         }
 316         device->silenced = silence;
 317         return true;
 318 }
 319
 320 AudioMixer::BusSettings AudioMixer::get_default_bus_settings()
 321 {
 322         BusSettings settings;
 323         settings.fader_volume_db = 0.0f;
 324         settings.muted = false;
 325         settings.locut_enabled = global_flags.locut_enabled;
 326         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 327                 settings.eq_level_db[band_index] = 0.0f;
 328         }
 329         settings.gain_staging_db = global_flags.initial_gain_staging_db;
 330         settings.level_compressor_enabled = global_flags.gain_staging_auto;
 331         settings.compressor_threshold_dbfs = ref_level_dbfs - 12.0f;  // -12 dB.
 332         settings.compressor_enabled = global_flags.compressor_enabled;
 333         return settings;
 334 }
 335
 336 AudioMixer::BusSettings AudioMixer::get_bus_settings(unsigned bus_index) const
 337 {
 338         lock_guard<timed_mutex> lock(audio_mutex);
 339         BusSettings settings;
 340         settings.fader_volume_db = fader_volume_db[bus_index];
 341         settings.muted = mute[bus_index];
 342         settings.locut_enabled = locut_enabled[bus_index];
 343         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 344                 settings.eq_level_db[band_index] = eq_level_db[bus_index][band_index];
 345         }
 346         settings.gain_staging_db = gain_staging_db[bus_index];
 347         settings.level_compressor_enabled = level_compressor_enabled[bus_index];
 348         settings.compressor_threshold_dbfs = compressor_threshold_dbfs[bus_index];
 349         settings.compressor_enabled = compressor_enabled[bus_index];
 350         return settings;
 351 }
 352
 353 void AudioMixer::set_bus_settings(unsigned bus_index, const AudioMixer::BusSettings &settings)
 354 {
 355         lock_guard<timed_mutex> lock(audio_mutex);
 356         fader_volume_db[bus_index] = settings.fader_volume_db;
 357         mute[bus_index] = settings.muted;
 358         locut_enabled[bus_index] = settings.locut_enabled;
 359         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 360                 eq_level_db[bus_index][band_index] = settings.eq_level_db[band_index];
 361         }
 362         gain_staging_db[bus_index] = settings.gain_staging_db;
 363         last_gain_staging_db[bus_index] = gain_staging_db[bus_index];
 364         level_compressor_enabled[bus_index] = settings.level_compressor_enabled;
 365         compressor_threshold_dbfs[bus_index] = settings.compressor_threshold_dbfs;
 366         compressor_enabled[bus_index] = settings.compressor_enabled;
 367 }
 368
 369 AudioMixer::AudioDevice *AudioMixer::find_audio_device(DeviceSpec device)
 370 {
 371         switch (device.type) {
 372         case InputSourceType::CAPTURE_CARD:
 373                 return &video_cards[device.index];
 374         case InputSourceType::ALSA_INPUT:
 375                 return &alsa_inputs[device.index];
 376         case InputSourceType::SILENCE:
 377         default:
 378                 assert(false);
 379         }
 380         return nullptr;
 381 }
 382
 383 // Get a pointer to the given channel from the given device.
 384 // The channel must be picked out earlier and resampled.
 385 void AudioMixer::find_sample_src_from_device(const map<DeviceSpec, vector<float>> &samples_card, DeviceSpec device_spec, int source_channel, const float **srcptr, unsigned *stride)
 386 {
 387         static float zero = 0.0f;
 388         if (source_channel == -1 || device_spec.type == InputSourceType::SILENCE) {
 389                 *srcptr = &zero;
 390                 *stride = 0;
 391                 return;
 392         }
 393         AudioDevice *device = find_audio_device(device_spec);
 394         assert(device->interesting_channels.count(source_channel) != 0);
 395         unsigned channel_index = 0;
 396         for (int channel : device->interesting_channels) {
 397                 if (channel == source_channel) break;
 398                 ++channel_index;
 399         }
 400         assert(channel_index < device->interesting_channels.size());
 401         const auto it = samples_card.find(device_spec);
 402         assert(it != samples_card.end());
 403         *srcptr = &(it->second)[channel_index];
 404         *stride = device->interesting_channels.size();
 405 }
 406
 407 // TODO: Can be SSSE3-optimized if need be.
 408 void AudioMixer::fill_audio_bus(const map<DeviceSpec, vector<float>> &samples_card, const InputMapping::Bus &bus, unsigned num_samples, float *output)
 409 {
 410         if (bus.device.type == InputSourceType::SILENCE) {
 411                 memset(output, 0, num_samples * 2 * sizeof(*output));
 412         } else {
 413                 assert(bus.device.type == InputSourceType::CAPTURE_CARD ||
 414                        bus.device.type == InputSourceType::ALSA_INPUT);
 415                 const float *lsrc, *rsrc;
 416                 unsigned lstride, rstride;
 417                 float *dptr = output;
 418                 find_sample_src_from_device(samples_card, bus.device, bus.source_channel[0], &lsrc, &lstride);
 419                 find_sample_src_from_device(samples_card, bus.device, bus.source_channel[1], &rsrc, &rstride);
 420                 for (unsigned i = 0; i < num_samples; ++i) {
 421                         *dptr++ = *lsrc;
 422                         *dptr++ = *rsrc;
 423                         lsrc += lstride;
 424                         rsrc += rstride;
 425                 }
 426         }
 427 }
 428
 429 vector<DeviceSpec> AudioMixer::get_active_devices() const
 430 {
 431         vector<DeviceSpec> ret;
 432         for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 433                 const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 434                 if (!find_audio_device(device_spec)->interesting_channels.empty()) {
 435                         ret.push_back(device_spec);
 436                 }
 437         }
 438         for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 439                 const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 440                 if (!find_audio_device(device_spec)->interesting_channels.empty()) {
 441                         ret.push_back(device_spec);
 442                 }
 443         }
 444         return ret;
 445 }
 446
 447 namespace {
 448
 449 void apply_gain(float db, float last_db, vector<float> *samples)
 450 {
 451         if (fabs(db - last_db) < 1e-3) {
 452                 // Constant over this frame.
 453                 const float gain = from_db(db);
 454                 for (size_t i = 0; i < samples->size(); ++i) {
 455                         (*samples)[i] *= gain;
 456                 }
 457         } else {
 458                 // We need to do a fade.
 459                 unsigned num_samples = samples->size() / 2;
 460                 float gain = from_db(last_db);
 461                 const float gain_inc = pow(from_db(db - last_db), 1.0 / num_samples);
 462                 for (size_t i = 0; i < num_samples; ++i) {
 463                         (*samples)[i * 2 + 0] *= gain;
 464                         (*samples)[i * 2 + 1] *= gain;
 465                         gain *= gain_inc;
 466                 }
 467         }
 468 }
 469
 470 }  // namespace
 471
 472 vector<float> AudioMixer::get_output(steady_clock::time_point ts, unsigned num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy)
 473 {
 474         map<DeviceSpec, vector<float>> samples_card;
 475         vector<float> samples_bus;
 476
 477         lock_guard<timed_mutex> lock(audio_mutex);
 478
 479         // Pick out all the interesting channels from all the cards.
 480         for (const DeviceSpec &device_spec : get_active_devices()) {
 481                 AudioDevice *device = find_audio_device(device_spec);
 482                 samples_card[device_spec].resize(num_samples * device->interesting_channels.size());
 483                 if (device->silenced) {
 484                         memset(&samples_card[device_spec][0], 0, samples_card[device_spec].size() * sizeof(float));
 485                 } else {
 486                         device->resampling_queue->get_output_samples(
 487                                 ts,
 488                                 &samples_card[device_spec][0],
 489                                 num_samples,
 490                                 rate_adjustment_policy);
 491                 }
 492         }
 493
 494         vector<float> samples_out, left, right;
 495         samples_out.resize(num_samples * 2);
 496         samples_bus.resize(num_samples * 2);
 497         for (unsigned bus_index = 0; bus_index < input_mapping.buses.size(); ++bus_index) {
 498                 fill_audio_bus(samples_card, input_mapping.buses[bus_index], num_samples, &samples_bus[0]);
 499                 apply_eq(bus_index, &samples_bus);
 500
 501                 {
 502                         lock_guard<mutex> lock(compressor_mutex);
 503
 504                         // Apply a level compressor to get the general level right.
 505                         // Basically, if it's over about -40 dBFS, we squeeze it down to that level
 506                         // (or more precisely, near it, since we don't use infinite ratio),
 507                         // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
 508                         // entirely arbitrary, but from practical tests with speech, it seems to
 509                         // put ut around -23 LUFS, so it's a reasonable starting point for later use.
 510                         if (level_compressor_enabled[bus_index]) {
 511                                 float threshold = 0.01f;   // -40 dBFS.
 512                                 float ratio = 20.0f;
 513                                 float attack_time = 0.5f;
 514                                 float release_time = 20.0f;
 515                                 float makeup_gain = from_db(ref_level_dbfs - (-40.0f));  // +26 dB.
 516                                 level_compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 517                                 gain_staging_db[bus_index] = to_db(level_compressor[bus_index]->get_attenuation() * makeup_gain);
 518                         } else {
 519                                 // Just apply the gain we already had.
 520                                 float db = gain_staging_db[bus_index];
 521                                 float last_db = last_gain_staging_db[bus_index];
 522                                 apply_gain(db, last_db, &samples_bus);
 523                         }
 524                         last_gain_staging_db[bus_index] = gain_staging_db[bus_index];
 525
 526 #if 0
 527                         printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
 528                                 level_compressor.get_level(), to_db(level_compressor.get_level()),
 529                                 level_compressor.get_attenuation(), to_db(level_compressor.get_attenuation()),
 530                                 to_db(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
 531 #endif
 532
 533                         // The real compressor.
 534                         if (compressor_enabled[bus_index]) {
 535                                 float threshold = from_db(compressor_threshold_dbfs[bus_index]);
 536                                 float ratio = 20.0f;
 537                                 float attack_time = 0.005f;
 538                                 float release_time = 0.040f;
 539                                 float makeup_gain = 2.0f;  // +6 dB.
 540                                 compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 541                 //              compressor_att = compressor.get_attenuation();
 542                         }
 543                 }
 544
 545                 add_bus_to_master(bus_index, samples_bus, &samples_out);
 546                 deinterleave_samples(samples_bus, &left, &right);
 547                 measure_bus_levels(bus_index, left, right);
 548         }
 549
 550         {
 551                 lock_guard<mutex> lock(compressor_mutex);
 552
 553                 // Finally a limiter at -4 dB (so, -10 dBFS) to take out the worst peaks only.
 554                 // Note that since ratio is not infinite, we could go slightly higher than this.
 555                 if (limiter_enabled) {
 556                         float threshold = from_db(limiter_threshold_dbfs);
 557                         float ratio = 30.0f;
 558                         float attack_time = 0.0f;  // Instant.
 559                         float release_time = 0.020f;
 560                         float makeup_gain = 1.0f;  // 0 dB.
 561                         limiter.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 562         //              limiter_att = limiter.get_attenuation();
 563                 }
 564
 565         //      printf("limiter=%+5.1f  compressor=%+5.1f\n", to_db(limiter_att), to_db(compressor_att));
 566         }
 567
 568         // At this point, we are most likely close to +0 LU (at least if the
 569         // faders sum to 0 dB and the compressors are on), but all of our
 570         // measurements have been on raw sample values, not R128 values.
 571         // So we have a final makeup gain to get us to +0 LU; the gain
 572         // adjustments required should be relatively small, and also, the
 573         // offset shouldn't change much (only if the type of audio changes
 574         // significantly). Thus, we shoot for updating this value basically
 575         // “whenever we process buffers”, since the R128 calculation isn't exactly
 576         // something we get out per-sample.
 577         //
 578         // Note that there's a feedback loop here, so we choose a very slow filter
 579         // (half-time of 30 seconds).
 580         double target_loudness_factor, alpha;
 581         double loudness_lu = r128.loudness_M() - ref_level_lufs;
 582         target_loudness_factor = final_makeup_gain * from_db(-loudness_lu);
 583
 584         // If we're outside +/- 5 LU (after correction), we don't count it as
 585         // a normal signal (probably silence) and don't change the
 586         // correction factor; just apply what we already have.
 587         if (fabs(loudness_lu) >= 5.0 || !final_makeup_gain_auto) {
 588                 alpha = 0.0;
 589         } else {
 590                 // Formula adapted from
 591                 // https://en.wikipedia.org/wiki/Low-pass_filter#Simple_infinite_impulse_response_filter.
 592                 const double half_time_s = 30.0;
 593                 const double fc_mul_2pi_delta_t = 1.0 / (half_time_s * OUTPUT_FREQUENCY);
 594                 alpha = fc_mul_2pi_delta_t / (fc_mul_2pi_delta_t + 1.0);
 595         }
 596
 597         {
 598                 lock_guard<mutex> lock(compressor_mutex);
 599                 double m = final_makeup_gain;
 600                 for (size_t i = 0; i < samples_out.size(); i += 2) {
 601                         samples_out[i + 0] *= m;
 602                         samples_out[i + 1] *= m;
 603                         m += (target_loudness_factor - m) * alpha;
 604                 }
 605                 final_makeup_gain = m;
 606         }
 607
 608         update_meters(samples_out);
 609
 610         return samples_out;
 611 }
 612
 613 namespace {
 614
 615 void apply_filter_fade(StereoFilter *filter, float *data, unsigned num_samples, float cutoff_hz, float db, float last_db)
 616 {
 617         // A granularity of 32 samples is an okay tradeoff between speed and
 618         // smoothness; recalculating the filters is pretty expensive, so it's
 619         // good that we don't do this all the time.
 620         static constexpr unsigned filter_granularity_samples = 32;
 621
 622         const float cutoff_linear = cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY;
 623         if (fabs(db - last_db) < 1e-3) {
 624                 // Constant over this frame.
 625                 if (fabs(db) > 0.01f) {
 626                         filter->render(data, num_samples, cutoff_linear, 0.5f, db / 40.0f);
 627                 }
 628         } else {
 629                 // We need to do a fade. (Rounding up avoids division by zero.)
 630                 unsigned num_blocks = (num_samples + filter_granularity_samples - 1) / filter_granularity_samples;
 631                 const float inc_db_norm = (db - last_db) / 40.0f / num_blocks;
 632                 float db_norm = db / 40.0f;
 633                 for (size_t i = 0; i < num_samples; i += filter_granularity_samples) {
 634                         size_t samples_this_block = std::min<size_t>(num_samples - i, filter_granularity_samples);
 635                         filter->render(data + i * 2, samples_this_block, cutoff_linear, 0.5f, db_norm);
 636                         db_norm += inc_db_norm;
 637                 }
 638         }
 639 }
 640
 641 }  // namespace
 642
 643 void AudioMixer::apply_eq(unsigned bus_index, vector<float> *samples_bus)
 644 {
 645         constexpr float bass_freq_hz = 200.0f;
 646         constexpr float treble_freq_hz = 4700.0f;
 647
 648         // Cut away everything under 120 Hz (or whatever the cutoff is);
 649         // we don't need it for voice, and it will reduce headroom
 650         // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
 651         // should be dampened.)
 652         if (locut_enabled[bus_index]) {
 653                 locut[bus_index].render(samples_bus->data(), samples_bus->size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
 654         }
 655
 656         // Apply the rest of the EQ. Since we only have a simple three-band EQ,
 657         // we can implement it with two shelf filters. We use a simple gain to
 658         // set the mid-level filter, and then offset the low and high bands
 659         // from that if we need to. (We could perhaps have folded the gain into
 660         // the next part, but it's so cheap that the trouble isn't worth it.)
 661         //
 662         // If any part of the EQ has changed appreciably since last frame,
 663         // we fade smoothly during the course of this frame.
 664         const float bass_db = eq_level_db[bus_index][EQ_BAND_BASS];
 665         const float mid_db = eq_level_db[bus_index][EQ_BAND_MID];
 666         const float treble_db = eq_level_db[bus_index][EQ_BAND_TREBLE];
 667
 668         const float last_bass_db = last_eq_level_db[bus_index][EQ_BAND_BASS];
 669         const float last_mid_db = last_eq_level_db[bus_index][EQ_BAND_MID];
 670         const float last_treble_db = last_eq_level_db[bus_index][EQ_BAND_TREBLE];
 671
 672         assert(samples_bus->size() % 2 == 0);
 673         const unsigned num_samples = samples_bus->size() / 2;
 674
 675         apply_gain(mid_db, last_mid_db, samples_bus);
 676
 677         apply_filter_fade(&eq[bus_index][EQ_BAND_BASS], samples_bus->data(), num_samples, bass_freq_hz, bass_db - mid_db, last_bass_db - last_mid_db);
 678         apply_filter_fade(&eq[bus_index][EQ_BAND_TREBLE], samples_bus->data(), num_samples, treble_freq_hz, treble_db - mid_db, last_treble_db - last_mid_db);
 679
 680         last_eq_level_db[bus_index][EQ_BAND_BASS] = bass_db;
 681         last_eq_level_db[bus_index][EQ_BAND_MID] = mid_db;
 682         last_eq_level_db[bus_index][EQ_BAND_TREBLE] = treble_db;
 683 }
 684
 685 void AudioMixer::add_bus_to_master(unsigned bus_index, const vector<float> &samples_bus, vector<float> *samples_out)
 686 {
 687         assert(samples_bus.size() == samples_out->size());
 688         assert(samples_bus.size() % 2 == 0);
 689         unsigned num_samples = samples_bus.size() / 2;
 690         const float new_volume_db = mute[bus_index] ? -90.0f : fader_volume_db[bus_index].load();
 691         if (fabs(new_volume_db - last_fader_volume_db[bus_index]) > 1e-3) {
 692                 // The volume has changed; do a fade over the course of this frame.
 693                 // (We might have some numerical issues here, but it seems to sound OK.)
 694                 // For the purpose of fading here, the silence floor is set to -90 dB
 695                 // (the fader only goes to -84).
 696                 float old_volume = from_db(max<float>(last_fader_volume_db[bus_index], -90.0f));
 697                 float volume = from_db(max<float>(new_volume_db, -90.0f));
 698
 699                 float volume_inc = pow(volume / old_volume, 1.0 / num_samples);
 700                 volume = old_volume;
 701                 if (bus_index == 0) {
 702                         for (unsigned i = 0; i < num_samples; ++i) {
 703                                 (*samples_out)[i * 2 + 0] = samples_bus[i * 2 + 0] * volume;
 704                                 (*samples_out)[i * 2 + 1] = samples_bus[i * 2 + 1] * volume;
 705                                 volume *= volume_inc;
 706                         }
 707                 } else {
 708                         for (unsigned i = 0; i < num_samples; ++i) {
 709                                 (*samples_out)[i * 2 + 0] += samples_bus[i * 2 + 0] * volume;
 710                                 (*samples_out)[i * 2 + 1] += samples_bus[i * 2 + 1] * volume;
 711                                 volume *= volume_inc;
 712                         }
 713                 }
 714         } else if (new_volume_db > -90.0f) {
 715                 float volume = from_db(new_volume_db);
 716                 if (bus_index == 0) {
 717                         for (unsigned i = 0; i < num_samples; ++i) {
 718                                 (*samples_out)[i * 2 + 0] = samples_bus[i * 2 + 0] * volume;
 719                                 (*samples_out)[i * 2 + 1] = samples_bus[i * 2 + 1] * volume;
 720                         }
 721                 } else {
 722                         for (unsigned i = 0; i < num_samples; ++i) {
 723                                 (*samples_out)[i * 2 + 0] += samples_bus[i * 2 + 0] * volume;
 724                                 (*samples_out)[i * 2 + 1] += samples_bus[i * 2 + 1] * volume;
 725                         }
 726                 }
 727         }
 728
 729         last_fader_volume_db[bus_index] = new_volume_db;
 730 }
 731
 732 void AudioMixer::measure_bus_levels(unsigned bus_index, const vector<float> &left, const vector<float> &right)
 733 {
 734         assert(left.size() == right.size());
 735         const float volume = mute[bus_index] ? 0.0f : from_db(fader_volume_db[bus_index]);
 736         const float peak_levels[2] = {
 737                 find_peak(left.data(), left.size()) * volume,
 738                 find_peak(right.data(), right.size()) * volume
 739         };
 740         for (unsigned channel = 0; channel < 2; ++channel) {
 741                 // Compute the current value, including hold and falloff.
 742                 // The constants are borrowed from zita-mu1 by Fons Adriaensen.
 743                 static constexpr float hold_sec = 0.5f;
 744                 static constexpr float falloff_db_sec = 15.0f;  // dB/sec falloff after hold.
 745                 float current_peak;
 746                 PeakHistory &history = peak_history[bus_index][channel];
 747                 history.historic_peak = max(history.historic_peak, peak_levels[channel]);
 748                 if (history.age_seconds < hold_sec) {
 749                         current_peak = history.last_peak;
 750                 } else {
 751                         current_peak = history.last_peak * from_db(-falloff_db_sec * (history.age_seconds - hold_sec));
 752                 }
 753
 754                 // See if we have a new peak to replace the old (possibly falling) one.
 755                 if (peak_levels[channel] > current_peak) {
 756                         history.last_peak = peak_levels[channel];
 757                         history.age_seconds = 0.0f;  // Not 100% correct, but more than good enough given our frame sizes.
 758                         current_peak = peak_levels[channel];
 759                 } else {
 760                         history.age_seconds += float(left.size()) / OUTPUT_FREQUENCY;
 761                 }
 762                 history.current_level = peak_levels[channel];
 763                 history.current_peak = current_peak;
 764         }
 765 }
 766
 767 void AudioMixer::update_meters(const vector<float> &samples)
 768 {
 769         // Upsample 4x to find interpolated peak.
 770         peak_resampler.inp_data = const_cast<float *>(samples.data());
 771         peak_resampler.inp_count = samples.size() / 2;
 772
 773         vector<float> interpolated_samples;
 774         interpolated_samples.resize(samples.size());
 775         {
 776                 lock_guard<mutex> lock(audio_measure_mutex);
 777
 778                 while (peak_resampler.inp_count > 0) {  // About four iterations.
 779                         peak_resampler.out_data = &interpolated_samples[0];
 780                         peak_resampler.out_count = interpolated_samples.size() / 2;
 781                         peak_resampler.process();
 782                         size_t out_stereo_samples = interpolated_samples.size() / 2 - peak_resampler.out_count;
 783                         peak = max<float>(peak, find_peak(interpolated_samples.data(), out_stereo_samples * 2));
 784                         peak_resampler.out_data = nullptr;
 785                 }
 786         }
 787
 788         // Find R128 levels and L/R correlation.
 789         vector<float> left, right;
 790         deinterleave_samples(samples, &left, &right);
 791         float *ptrs[] = { left.data(), right.data() };
 792         {
 793                 lock_guard<mutex> lock(audio_measure_mutex);
 794                 r128.process(left.size(), ptrs);
 795                 correlation.process_samples(samples);
 796         }
 797
 798         send_audio_level_callback();
 799 }
 800
 801 void AudioMixer::reset_meters()
 802 {
 803         lock_guard<mutex> lock(audio_measure_mutex);
 804         peak_resampler.reset();
 805         peak = 0.0f;
 806         r128.reset();
 807         r128.integr_start();
 808         correlation.reset();
 809 }
 810
 811 void AudioMixer::send_audio_level_callback()
 812 {
 813         if (audio_level_callback == nullptr) {
 814                 return;
 815         }
 816
 817         lock_guard<mutex> lock(audio_measure_mutex);
 818         double loudness_s = r128.loudness_S();
 819         double loudness_i = r128.integrated();
 820         double loudness_range_low = r128.range_min();
 821         double loudness_range_high = r128.range_max();
 822
 823         vector<BusLevel> bus_levels;
 824         bus_levels.resize(input_mapping.buses.size());
 825         {
 826                 lock_guard<mutex> lock(compressor_mutex);
 827                 for (unsigned bus_index = 0; bus_index < bus_levels.size(); ++bus_index) {
 828                         bus_levels[bus_index].current_level_dbfs[0] = to_db(peak_history[bus_index][0].current_level);
 829                         bus_levels[bus_index].current_level_dbfs[1] = to_db(peak_history[bus_index][1].current_level);
 830                         bus_levels[bus_index].peak_level_dbfs[0] = to_db(peak_history[bus_index][0].current_peak);
 831                         bus_levels[bus_index].peak_level_dbfs[1] = to_db(peak_history[bus_index][1].current_peak);
 832                         bus_levels[bus_index].historic_peak_dbfs = to_db(
 833                                 max(peak_history[bus_index][0].historic_peak,
 834                                     peak_history[bus_index][1].historic_peak));
 835                         bus_levels[bus_index].gain_staging_db = gain_staging_db[bus_index];
 836                         if (compressor_enabled[bus_index]) {
 837                                 bus_levels[bus_index].compressor_attenuation_db = -to_db(compressor[bus_index]->get_attenuation());
 838                         } else {
 839                                 bus_levels[bus_index].compressor_attenuation_db = 0.0;
 840                         }
 841                 }
 842         }
 843
 844         audio_level_callback(loudness_s, to_db(peak), bus_levels,
 845                 loudness_i, loudness_range_low, loudness_range_high,
 846                 to_db(final_makeup_gain),
 847                 correlation.get_correlation());
 848 }
 849
 850 map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
 851 {
 852         lock_guard<timed_mutex> lock(audio_mutex);
 853
 854         map<DeviceSpec, DeviceInfo> devices;
 855         for (unsigned card_index = 0; card_index < num_cards; ++card_index) {
 856                 const DeviceSpec spec{ InputSourceType::CAPTURE_CARD, card_index };
 857                 const AudioDevice *device = &video_cards[card_index];
 858                 DeviceInfo info;
 859                 info.display_name = device->display_name;
 860                 info.num_channels = 8;
 861                 devices.insert(make_pair(spec, info));
 862         }
 863         vector<ALSAPool::Device> available_alsa_devices = alsa_pool.get_devices();
 864         for (unsigned card_index = 0; card_index < available_alsa_devices.size(); ++card_index) {
 865                 const DeviceSpec spec{ InputSourceType::ALSA_INPUT, card_index };
 866                 const ALSAPool::Device &device = available_alsa_devices[card_index];
 867                 DeviceInfo info;
 868                 info.display_name = device.display_name();
 869                 info.num_channels = device.num_channels;
 870                 info.alsa_name = device.name;
 871                 info.alsa_info = device.info;
 872                 info.alsa_address = device.address;
 873                 devices.insert(make_pair(spec, info));
 874         }
 875         return devices;
 876 }
 877
 878 void AudioMixer::set_display_name(DeviceSpec device_spec, const string &name)
 879 {
 880         AudioDevice *device = find_audio_device(device_spec);
 881
 882         lock_guard<timed_mutex> lock(audio_mutex);
 883         device->display_name = name;
 884 }
 885
 886 void AudioMixer::serialize_device(DeviceSpec device_spec, DeviceSpecProto *device_spec_proto)
 887 {
 888         lock_guard<timed_mutex> lock(audio_mutex);
 889         switch (device_spec.type) {
 890                 case InputSourceType::SILENCE:
 891                         device_spec_proto->set_type(DeviceSpecProto::SILENCE);
 892                         break;
 893                 case InputSourceType::CAPTURE_CARD:
 894                         device_spec_proto->set_type(DeviceSpecProto::CAPTURE_CARD);
 895                         device_spec_proto->set_index(device_spec.index);
 896                         device_spec_proto->set_display_name(video_cards[device_spec.index].display_name);
 897                         break;
 898                 case InputSourceType::ALSA_INPUT:
 899                         alsa_pool.serialize_device(device_spec.index, device_spec_proto);
 900                         break;
 901         }
 902 }
 903
 904 void AudioMixer::set_simple_input(unsigned card_index)
 905 {
 906         InputMapping new_input_mapping;
 907         InputMapping::Bus input;
 908         input.name = "Main";
 909         input.device.type = InputSourceType::CAPTURE_CARD;
 910         input.device.index = card_index;
 911         input.source_channel[0] = 0;
 912         input.source_channel[1] = 1;
 913
 914         new_input_mapping.buses.push_back(input);
 915
 916         lock_guard<timed_mutex> lock(audio_mutex);
 917         current_mapping_mode = MappingMode::SIMPLE;
 918         set_input_mapping_lock_held(new_input_mapping);
 919         fader_volume_db[0] = 0.0f;
 920 }
 921
 922 unsigned AudioMixer::get_simple_input() const
 923 {
 924         lock_guard<timed_mutex> lock(audio_mutex);
 925         if (input_mapping.buses.size() == 1 &&
 926             input_mapping.buses[0].device.type == InputSourceType::CAPTURE_CARD &&
 927             input_mapping.buses[0].source_channel[0] == 0 &&
 928             input_mapping.buses[0].source_channel[1] == 1) {
 929                 return input_mapping.buses[0].device.index;
 930         } else {
 931                 return numeric_limits<unsigned>::max();
 932         }
 933 }
 934
 935 void AudioMixer::set_input_mapping(const InputMapping &new_input_mapping)
 936 {
 937         lock_guard<timed_mutex> lock(audio_mutex);
 938         set_input_mapping_lock_held(new_input_mapping);
 939         current_mapping_mode = MappingMode::MULTICHANNEL;
 940 }
 941
 942 AudioMixer::MappingMode AudioMixer::get_mapping_mode() const
 943 {
 944         lock_guard<timed_mutex> lock(audio_mutex);
 945         return current_mapping_mode;
 946 }
 947
 948 void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mapping)
 949 {
 950         map<DeviceSpec, set<unsigned>> interesting_channels;
 951         for (const InputMapping::Bus &bus : new_input_mapping.buses) {
 952                 if (bus.device.type == InputSourceType::CAPTURE_CARD ||
 953                     bus.device.type == InputSourceType::ALSA_INPUT) {
 954                         for (unsigned channel = 0; channel < 2; ++channel) {
 955                                 if (bus.source_channel[channel] != -1) {
 956                                         interesting_channels[bus.device].insert(bus.source_channel[channel]);
 957                                 }
 958                         }
 959                 }
 960         }
 961
 962         // Reset resamplers for all cards that don't have the exact same state as before.
 963         for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 964                 const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 965                 AudioDevice *device = find_audio_device(device_spec);
 966                 if (device->interesting_channels != interesting_channels[device_spec]) {
 967                         device->interesting_channels = interesting_channels[device_spec];
 968                         reset_resampler_mutex_held(device_spec);
 969                 }
 970         }
 971         for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 972                 const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 973                 AudioDevice *device = find_audio_device(device_spec);
 974                 if (interesting_channels[device_spec].empty()) {
 975                         alsa_pool.release_device(card_index);
 976                 } else {
 977                         alsa_pool.hold_device(card_index);
 978                 }
 979                 if (device->interesting_channels != interesting_channels[device_spec]) {
 980                         device->interesting_channels = interesting_channels[device_spec];
 981                         alsa_pool.reset_device(device_spec.index);
 982                         reset_resampler_mutex_held(device_spec);
 983                 }
 984         }
 985
 986         input_mapping = new_input_mapping;
 987 }
 988
 989 InputMapping AudioMixer::get_input_mapping() const
 990 {
 991         lock_guard<timed_mutex> lock(audio_mutex);
 992         return input_mapping;
 993 }
 994
 995 unsigned AudioMixer::num_buses() const
 996 {
 997         lock_guard<timed_mutex> lock(audio_mutex);
 998         return input_mapping.buses.size();
 999 }
1000
1001 void AudioMixer::reset_peak(unsigned bus_index)
1002 {
1003         lock_guard<timed_mutex> lock(audio_mutex);
1004         for (unsigned channel = 0; channel < 2; ++channel) {
1005                 PeakHistory &history = peak_history[bus_index][channel];
1006                 history.current_level = 0.0f;
1007                 history.historic_peak = 0.0f;
1008                 history.current_peak = 0.0f;
1009                 history.last_peak = 0.0f;
1010                 history.age_seconds = 0.0f;
1011         }
1012 }
1013
1014 AudioMixer *global_audio_mixer = nullptr;