git.sesse.net Git - nageru/blob - audio_mixer.cpp

   1 #include "audio_mixer.h"
   2
   3 #include <assert.h>
   4 #include <bmusb/bmusb.h>
   5 #include <endian.h>
   6 #include <math.h>
   7 #ifdef __SSE2__
   8 #include <immintrin.h>
   9 #endif
  10 #include <stdbool.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <algorithm>
  15 #include <chrono>
  16 #include <cmath>
  17 #include <cstddef>
  18 #include <limits>
  19 #include <utility>
  20
  21 #include "db.h"
  22 #include "flags.h"
  23 #include "state.pb.h"
  24 #include "timebase.h"
  25
  26 using namespace bmusb;
  27 using namespace std;
  28 using namespace std::chrono;
  29 using namespace std::placeholders;
  30
  31 namespace {
  32
  33 // TODO: If these prove to be a bottleneck, they can be SSSE3-optimized
  34 // (usually including multiple channels at a time).
  35
  36 void convert_fixed16_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  37                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  38                              size_t num_samples)
  39 {
  40         assert(in_channel < in_num_channels);
  41         assert(out_channel < out_num_channels);
  42         src += in_channel * 2;
  43         dst += out_channel;
  44
  45         for (size_t i = 0; i < num_samples; ++i) {
  46                 int16_t s = le16toh(*(int16_t *)src);
  47                 *dst = s * (1.0f / 32768.0f);
  48
  49                 src += 2 * in_num_channels;
  50                 dst += out_num_channels;
  51         }
  52 }
  53
  54 void convert_fixed24_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  55                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  56                              size_t num_samples)
  57 {
  58         assert(in_channel < in_num_channels);
  59         assert(out_channel < out_num_channels);
  60         src += in_channel * 3;
  61         dst += out_channel;
  62
  63         for (size_t i = 0; i < num_samples; ++i) {
  64                 uint32_t s1 = src[0];
  65                 uint32_t s2 = src[1];
  66                 uint32_t s3 = src[2];
  67                 uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24);
  68                 *dst = int(s) * (1.0f / 2147483648.0f);
  69
  70                 src += 3 * in_num_channels;
  71                 dst += out_num_channels;
  72         }
  73 }
  74
  75 void convert_fixed32_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  76                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  77                              size_t num_samples)
  78 {
  79         assert(in_channel < in_num_channels);
  80         assert(out_channel < out_num_channels);
  81         src += in_channel * 4;
  82         dst += out_channel;
  83
  84         for (size_t i = 0; i < num_samples; ++i) {
  85                 int32_t s = le32toh(*(int32_t *)src);
  86                 *dst = s * (1.0f / 2147483648.0f);
  87
  88                 src += 4 * in_num_channels;
  89                 dst += out_num_channels;
  90         }
  91 }
  92
  93 float find_peak_plain(const float *samples, size_t num_samples) __attribute__((unused));
  94
  95 float find_peak_plain(const float *samples, size_t num_samples)
  96 {
  97         float m = fabs(samples[0]);
  98         for (size_t i = 1; i < num_samples; ++i) {
  99                 m = max(m, fabs(samples[i]));
 100         }
 101         return m;
 102 }
 103
 104 #ifdef __SSE__
 105 static inline float horizontal_max(__m128 m)
 106 {
 107         __m128 tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 0, 3, 2));
 108         m = _mm_max_ps(m, tmp);
 109         tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 3, 0, 1));
 110         m = _mm_max_ps(m, tmp);
 111         return _mm_cvtss_f32(m);
 112 }
 113
 114 float find_peak(const float *samples, size_t num_samples)
 115 {
 116         const __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu));
 117         __m128 m = _mm_setzero_ps();
 118         for (size_t i = 0; i < (num_samples & ~3); i += 4) {
 119                 __m128 x = _mm_loadu_ps(samples + i);
 120                 x = _mm_and_ps(x, abs_mask);
 121                 m = _mm_max_ps(m, x);
 122         }
 123         float result = horizontal_max(m);
 124
 125         for (size_t i = (num_samples & ~3); i < num_samples; ++i) {
 126                 result = max(result, fabs(samples[i]));
 127         }
 128
 129 #if 0
 130         // Self-test. We should be bit-exact the same.
 131         float reference_result = find_peak_plain(samples, num_samples);
 132         if (result != reference_result) {
 133                 fprintf(stderr, "Error: Peak is %f [%f %f %f %f]; should be %f.\n",
 134                         result,
 135                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(0, 0, 0, 0))),
 136                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))),
 137                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))),
 138                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))),
 139                         reference_result);
 140                 abort();
 141         }
 142 #endif
 143         return result;
 144 }
 145 #else
 146 float find_peak(const float *samples, size_t num_samples)
 147 {
 148         return find_peak_plain(samples, num_samples);
 149 }
 150 #endif
 151
 152 void deinterleave_samples(const vector<float> &in, vector<float> *out_l, vector<float> *out_r)
 153 {
 154         size_t num_samples = in.size() / 2;
 155         out_l->resize(num_samples);
 156         out_r->resize(num_samples);
 157
 158         const float *inptr = in.data();
 159         float *lptr = &(*out_l)[0];
 160         float *rptr = &(*out_r)[0];
 161         for (size_t i = 0; i < num_samples; ++i) {
 162                 *lptr++ = *inptr++;
 163                 *rptr++ = *inptr++;
 164         }
 165 }
 166
 167 }  // namespace
 168
 169 AudioMixer::AudioMixer(unsigned num_cards)
 170         : num_cards(num_cards),
 171           limiter(OUTPUT_FREQUENCY),
 172           correlation(OUTPUT_FREQUENCY)
 173 {
 174         for (unsigned bus_index = 0; bus_index < MAX_BUSES; ++bus_index) {
 175                 locut[bus_index].init(FILTER_HPF, 2);
 176                 eq[bus_index][EQ_BAND_BASS].init(FILTER_LOW_SHELF, 1);
 177                 // Note: EQ_BAND_MID isn't used (see comments in apply_eq()).
 178                 eq[bus_index][EQ_BAND_TREBLE].init(FILTER_HIGH_SHELF, 1);
 179                 compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
 180                 level_compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
 181
 182                 set_bus_settings(bus_index, get_default_bus_settings());
 183         }
 184         set_limiter_enabled(global_flags.limiter_enabled);
 185         set_final_makeup_gain_auto(global_flags.final_makeup_gain_auto);
 186
 187         r128.init(2, OUTPUT_FREQUENCY);
 188         r128.integr_start();
 189
 190         // hlen=16 is pretty low quality, but we use quite a bit of CPU otherwise,
 191         // and there's a limit to how important the peak meter is.
 192         peak_resampler.setup(OUTPUT_FREQUENCY, OUTPUT_FREQUENCY * 4, /*num_channels=*/2, /*hlen=*/16, /*frel=*/1.0);
 193
 194         global_audio_mixer = this;
 195         alsa_pool.init();
 196
 197         if (!global_flags.input_mapping_filename.empty()) {
 198                 // Must happen after ALSAPool is initialized, as it needs to know the card list.
 199                 current_mapping_mode = MappingMode::MULTICHANNEL;
 200                 InputMapping new_input_mapping;
 201                 if (!load_input_mapping_from_file(get_devices(),
 202                                                   global_flags.input_mapping_filename,
 203                                                   &new_input_mapping)) {
 204                         fprintf(stderr, "Failed to load input mapping from '%s', exiting.\n",
 205                                 global_flags.input_mapping_filename.c_str());
 206                         exit(1);
 207                 }
 208                 set_input_mapping(new_input_mapping);
 209         } else {
 210                 set_simple_input(/*card_index=*/0);
 211                 if (global_flags.multichannel_mapping_mode) {
 212                         current_mapping_mode = MappingMode::MULTICHANNEL;
 213                 }
 214         }
 215 }
 216
 217 void AudioMixer::reset_resampler(DeviceSpec device_spec)
 218 {
 219         lock_guard<timed_mutex> lock(audio_mutex);
 220         reset_resampler_mutex_held(device_spec);
 221 }
 222
 223 void AudioMixer::reset_resampler_mutex_held(DeviceSpec device_spec)
 224 {
 225         AudioDevice *device = find_audio_device(device_spec);
 226
 227         if (device->interesting_channels.empty()) {
 228                 device->resampling_queue.reset();
 229         } else {
 230                 // TODO: ResamplingQueue should probably take the full device spec.
 231                 // (It's only used for console output, though.)
 232                 device->resampling_queue.reset(new ResamplingQueue(
 233                         device_spec.index, device->capture_frequency, OUTPUT_FREQUENCY, device->interesting_channels.size(),
 234                         global_flags.audio_queue_length_ms * 0.001));
 235         }
 236 }
 237
 238 bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned num_samples, AudioFormat audio_format, int64_t frame_length, steady_clock::time_point frame_time)
 239 {
 240         AudioDevice *device = find_audio_device(device_spec);
 241
 242         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 243         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 244                 return false;
 245         }
 246         if (device->resampling_queue == nullptr) {
 247                 // No buses use this device; throw it away.
 248                 return true;
 249         }
 250
 251         unsigned num_channels = device->interesting_channels.size();
 252         assert(num_channels > 0);
 253
 254         // Convert the audio to fp32.
 255         unique_ptr<float[]> audio(new float[num_samples * num_channels]);
 256         unsigned channel_index = 0;
 257         for (auto channel_it = device->interesting_channels.cbegin(); channel_it != device->interesting_channels.end(); ++channel_it, ++channel_index) {
 258                 switch (audio_format.bits_per_sample) {
 259                 case 0:
 260                         assert(num_samples == 0);
 261                         break;
 262                 case 16:
 263                         convert_fixed16_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 264                         break;
 265                 case 24:
 266                         convert_fixed24_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 267                         break;
 268                 case 32:
 269                         convert_fixed32_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 270                         break;
 271                 default:
 272                         fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
 273                         assert(false);
 274                 }
 275         }
 276
 277         // If we changed frequency since last frame, we'll need to reset the resampler.
 278         if (audio_format.sample_rate != device->capture_frequency) {
 279                 device->capture_frequency = audio_format.sample_rate;
 280                 reset_resampler_mutex_held(device_spec);
 281         }
 282
 283         // Now add it.
 284         device->resampling_queue->add_input_samples(frame_time, audio.get(), num_samples, ResamplingQueue::ADJUST_RATE);
 285         return true;
 286 }
 287
 288 bool AudioMixer::add_silence(DeviceSpec device_spec, unsigned samples_per_frame, unsigned num_frames, int64_t frame_length)
 289 {
 290         AudioDevice *device = find_audio_device(device_spec);
 291
 292         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 293         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 294                 return false;
 295         }
 296         if (device->resampling_queue == nullptr) {
 297                 // No buses use this device; throw it away.
 298                 return true;
 299         }
 300
 301         unsigned num_channels = device->interesting_channels.size();
 302         assert(num_channels > 0);
 303
 304         vector<float> silence(samples_per_frame * num_channels, 0.0f);
 305         for (unsigned i = 0; i < num_frames; ++i) {
 306                 device->resampling_queue->add_input_samples(steady_clock::now(), silence.data(), samples_per_frame, ResamplingQueue::DO_NOT_ADJUST_RATE);
 307         }
 308         return true;
 309 }
 310
 311 bool AudioMixer::silence_card(DeviceSpec device_spec, bool silence)
 312 {
 313         AudioDevice *device = find_audio_device(device_spec);
 314
 315         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 316         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 317                 return false;
 318         }
 319
 320         if (device->silenced && !silence) {
 321                 reset_resampler_mutex_held(device_spec);
 322         }
 323         device->silenced = silence;
 324         return true;
 325 }
 326
 327 AudioMixer::BusSettings AudioMixer::get_default_bus_settings()
 328 {
 329         BusSettings settings;
 330         settings.fader_volume_db = 0.0f;
 331         settings.muted = false;
 332         settings.locut_enabled = global_flags.locut_enabled;
 333         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 334                 settings.eq_level_db[band_index] = 0.0f;
 335         }
 336         settings.gain_staging_db = global_flags.initial_gain_staging_db;
 337         settings.level_compressor_enabled = global_flags.gain_staging_auto;
 338         settings.compressor_threshold_dbfs = ref_level_dbfs - 12.0f;  // -12 dB.
 339         settings.compressor_enabled = global_flags.compressor_enabled;
 340         return settings;
 341 }
 342
 343 AudioMixer::BusSettings AudioMixer::get_bus_settings(unsigned bus_index) const
 344 {
 345         lock_guard<timed_mutex> lock(audio_mutex);
 346         BusSettings settings;
 347         settings.fader_volume_db = fader_volume_db[bus_index];
 348         settings.muted = mute[bus_index];
 349         settings.locut_enabled = locut_enabled[bus_index];
 350         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 351                 settings.eq_level_db[band_index] = eq_level_db[bus_index][band_index];
 352         }
 353         settings.gain_staging_db = gain_staging_db[bus_index];
 354         settings.level_compressor_enabled = level_compressor_enabled[bus_index];
 355         settings.compressor_threshold_dbfs = compressor_threshold_dbfs[bus_index];
 356         settings.compressor_enabled = compressor_enabled[bus_index];
 357         return settings;
 358 }
 359
 360 void AudioMixer::set_bus_settings(unsigned bus_index, const AudioMixer::BusSettings &settings)
 361 {
 362         lock_guard<timed_mutex> lock(audio_mutex);
 363         fader_volume_db[bus_index] = settings.fader_volume_db;
 364         mute[bus_index] = settings.muted;
 365         locut_enabled[bus_index] = settings.locut_enabled;
 366         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 367                 eq_level_db[bus_index][band_index] = settings.eq_level_db[band_index];
 368         }
 369         gain_staging_db[bus_index] = settings.gain_staging_db;
 370         last_gain_staging_db[bus_index] = gain_staging_db[bus_index];
 371         level_compressor_enabled[bus_index] = settings.level_compressor_enabled;
 372         compressor_threshold_dbfs[bus_index] = settings.compressor_threshold_dbfs;
 373         compressor_enabled[bus_index] = settings.compressor_enabled;
 374 }
 375
 376 AudioMixer::AudioDevice *AudioMixer::find_audio_device(DeviceSpec device)
 377 {
 378         switch (device.type) {
 379         case InputSourceType::CAPTURE_CARD:
 380                 return &video_cards[device.index];
 381         case InputSourceType::ALSA_INPUT:
 382                 return &alsa_inputs[device.index];
 383         case InputSourceType::SILENCE:
 384         default:
 385                 assert(false);
 386         }
 387         return nullptr;
 388 }
 389
 390 // Get a pointer to the given channel from the given device.
 391 // The channel must be picked out earlier and resampled.
 392 void AudioMixer::find_sample_src_from_device(const map<DeviceSpec, vector<float>> &samples_card, DeviceSpec device_spec, int source_channel, const float **srcptr, unsigned *stride)
 393 {
 394         static float zero = 0.0f;
 395         if (source_channel == -1 || device_spec.type == InputSourceType::SILENCE) {
 396                 *srcptr = &zero;
 397                 *stride = 0;
 398                 return;
 399         }
 400         AudioDevice *device = find_audio_device(device_spec);
 401         assert(device->interesting_channels.count(source_channel) != 0);
 402         unsigned channel_index = 0;
 403         for (int channel : device->interesting_channels) {
 404                 if (channel == source_channel) break;
 405                 ++channel_index;
 406         }
 407         assert(channel_index < device->interesting_channels.size());
 408         const auto it = samples_card.find(device_spec);
 409         assert(it != samples_card.end());
 410         *srcptr = &(it->second)[channel_index];
 411         *stride = device->interesting_channels.size();
 412 }
 413
 414 // TODO: Can be SSSE3-optimized if need be.
 415 void AudioMixer::fill_audio_bus(const map<DeviceSpec, vector<float>> &samples_card, const InputMapping::Bus &bus, unsigned num_samples, float *output)
 416 {
 417         if (bus.device.type == InputSourceType::SILENCE) {
 418                 memset(output, 0, num_samples * 2 * sizeof(*output));
 419         } else {
 420                 assert(bus.device.type == InputSourceType::CAPTURE_CARD ||
 421                        bus.device.type == InputSourceType::ALSA_INPUT);
 422                 const float *lsrc, *rsrc;
 423                 unsigned lstride, rstride;
 424                 float *dptr = output;
 425                 find_sample_src_from_device(samples_card, bus.device, bus.source_channel[0], &lsrc, &lstride);
 426                 find_sample_src_from_device(samples_card, bus.device, bus.source_channel[1], &rsrc, &rstride);
 427                 for (unsigned i = 0; i < num_samples; ++i) {
 428                         *dptr++ = *lsrc;
 429                         *dptr++ = *rsrc;
 430                         lsrc += lstride;
 431                         rsrc += rstride;
 432                 }
 433         }
 434 }
 435
 436 vector<DeviceSpec> AudioMixer::get_active_devices() const
 437 {
 438         vector<DeviceSpec> ret;
 439         for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 440                 const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 441                 if (!find_audio_device(device_spec)->interesting_channels.empty()) {
 442                         ret.push_back(device_spec);
 443                 }
 444         }
 445         for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 446                 const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 447                 if (!find_audio_device(device_spec)->interesting_channels.empty()) {
 448                         ret.push_back(device_spec);
 449                 }
 450         }
 451         return ret;
 452 }
 453
 454 namespace {
 455
 456 void apply_gain(float db, float last_db, vector<float> *samples)
 457 {
 458         if (fabs(db - last_db) < 1e-3) {
 459                 // Constant over this frame.
 460                 const float gain = from_db(db);
 461                 for (size_t i = 0; i < samples->size(); ++i) {
 462                         (*samples)[i] *= gain;
 463                 }
 464         } else {
 465                 // We need to do a fade.
 466                 unsigned num_samples = samples->size() / 2;
 467                 float gain = from_db(last_db);
 468                 const float gain_inc = pow(from_db(db - last_db), 1.0 / num_samples);
 469                 for (size_t i = 0; i < num_samples; ++i) {
 470                         (*samples)[i * 2 + 0] *= gain;
 471                         (*samples)[i * 2 + 1] *= gain;
 472                         gain *= gain_inc;
 473                 }
 474         }
 475 }
 476
 477 }  // namespace
 478
 479 vector<float> AudioMixer::get_output(steady_clock::time_point ts, unsigned num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy)
 480 {
 481         map<DeviceSpec, vector<float>> samples_card;
 482         vector<float> samples_bus;
 483
 484         lock_guard<timed_mutex> lock(audio_mutex);
 485
 486         // Pick out all the interesting channels from all the cards.
 487         for (const DeviceSpec &device_spec : get_active_devices()) {
 488                 AudioDevice *device = find_audio_device(device_spec);
 489                 samples_card[device_spec].resize(num_samples * device->interesting_channels.size());
 490                 if (device->silenced) {
 491                         memset(&samples_card[device_spec][0], 0, samples_card[device_spec].size() * sizeof(float));
 492                 } else {
 493                         device->resampling_queue->get_output_samples(
 494                                 ts,
 495                                 &samples_card[device_spec][0],
 496                                 num_samples,
 497                                 rate_adjustment_policy);
 498                 }
 499         }
 500
 501         vector<float> samples_out, left, right;
 502         samples_out.resize(num_samples * 2);
 503         samples_bus.resize(num_samples * 2);
 504         for (unsigned bus_index = 0; bus_index < input_mapping.buses.size(); ++bus_index) {
 505                 fill_audio_bus(samples_card, input_mapping.buses[bus_index], num_samples, &samples_bus[0]);
 506                 apply_eq(bus_index, &samples_bus);
 507
 508                 {
 509                         lock_guard<mutex> lock(compressor_mutex);
 510
 511                         // Apply a level compressor to get the general level right.
 512                         // Basically, if it's over about -40 dBFS, we squeeze it down to that level
 513                         // (or more precisely, near it, since we don't use infinite ratio),
 514                         // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
 515                         // entirely arbitrary, but from practical tests with speech, it seems to
 516                         // put ut around -23 LUFS, so it's a reasonable starting point for later use.
 517                         if (level_compressor_enabled[bus_index]) {
 518                                 float threshold = 0.01f;   // -40 dBFS.
 519                                 float ratio = 20.0f;
 520                                 float attack_time = 0.5f;
 521                                 float release_time = 20.0f;
 522                                 float makeup_gain = from_db(ref_level_dbfs - (-40.0f));  // +26 dB.
 523                                 level_compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 524                                 gain_staging_db[bus_index] = to_db(level_compressor[bus_index]->get_attenuation() * makeup_gain);
 525                         } else {
 526                                 // Just apply the gain we already had.
 527                                 float db = gain_staging_db[bus_index];
 528                                 float last_db = last_gain_staging_db[bus_index];
 529                                 apply_gain(db, last_db, &samples_bus);
 530                         }
 531                         last_gain_staging_db[bus_index] = gain_staging_db[bus_index];
 532
 533 #if 0
 534                         printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
 535                                 level_compressor.get_level(), to_db(level_compressor.get_level()),
 536                                 level_compressor.get_attenuation(), to_db(level_compressor.get_attenuation()),
 537                                 to_db(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
 538 #endif
 539
 540                         // The real compressor.
 541                         if (compressor_enabled[bus_index]) {
 542                                 float threshold = from_db(compressor_threshold_dbfs[bus_index]);
 543                                 float ratio = 20.0f;
 544                                 float attack_time = 0.005f;
 545                                 float release_time = 0.040f;
 546                                 float makeup_gain = 2.0f;  // +6 dB.
 547                                 compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 548                 //              compressor_att = compressor.get_attenuation();
 549                         }
 550                 }
 551
 552                 add_bus_to_master(bus_index, samples_bus, &samples_out);
 553                 deinterleave_samples(samples_bus, &left, &right);
 554                 measure_bus_levels(bus_index, left, right);
 555         }
 556
 557         {
 558                 lock_guard<mutex> lock(compressor_mutex);
 559
 560                 // Finally a limiter at -4 dB (so, -10 dBFS) to take out the worst peaks only.
 561                 // Note that since ratio is not infinite, we could go slightly higher than this.
 562                 if (limiter_enabled) {
 563                         float threshold = from_db(limiter_threshold_dbfs);
 564                         float ratio = 30.0f;
 565                         float attack_time = 0.0f;  // Instant.
 566                         float release_time = 0.020f;
 567                         float makeup_gain = 1.0f;  // 0 dB.
 568                         limiter.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 569         //              limiter_att = limiter.get_attenuation();
 570                 }
 571
 572         //      printf("limiter=%+5.1f  compressor=%+5.1f\n", to_db(limiter_att), to_db(compressor_att));
 573         }
 574
 575         // At this point, we are most likely close to +0 LU (at least if the
 576         // faders sum to 0 dB and the compressors are on), but all of our
 577         // measurements have been on raw sample values, not R128 values.
 578         // So we have a final makeup gain to get us to +0 LU; the gain
 579         // adjustments required should be relatively small, and also, the
 580         // offset shouldn't change much (only if the type of audio changes
 581         // significantly). Thus, we shoot for updating this value basically
 582         // “whenever we process buffers”, since the R128 calculation isn't exactly
 583         // something we get out per-sample.
 584         //
 585         // Note that there's a feedback loop here, so we choose a very slow filter
 586         // (half-time of 30 seconds).
 587         double target_loudness_factor, alpha;
 588         double loudness_lu = r128.loudness_M() - ref_level_lufs;
 589         target_loudness_factor = final_makeup_gain * from_db(-loudness_lu);
 590
 591         // If we're outside +/- 5 LU (after correction), we don't count it as
 592         // a normal signal (probably silence) and don't change the
 593         // correction factor; just apply what we already have.
 594         if (fabs(loudness_lu) >= 5.0 || !final_makeup_gain_auto) {
 595                 alpha = 0.0;
 596         } else {
 597                 // Formula adapted from
 598                 // https://en.wikipedia.org/wiki/Low-pass_filter#Simple_infinite_impulse_response_filter.
 599                 const double half_time_s = 30.0;
 600                 const double fc_mul_2pi_delta_t = 1.0 / (half_time_s * OUTPUT_FREQUENCY);
 601                 alpha = fc_mul_2pi_delta_t / (fc_mul_2pi_delta_t + 1.0);
 602         }
 603
 604         {
 605                 lock_guard<mutex> lock(compressor_mutex);
 606                 double m = final_makeup_gain;
 607                 for (size_t i = 0; i < samples_out.size(); i += 2) {
 608                         samples_out[i + 0] *= m;
 609                         samples_out[i + 1] *= m;
 610                         m += (target_loudness_factor - m) * alpha;
 611                 }
 612                 final_makeup_gain = m;
 613         }
 614
 615         update_meters(samples_out);
 616
 617         return samples_out;
 618 }
 619
 620 namespace {
 621
 622 void apply_filter_fade(StereoFilter *filter, float *data, unsigned num_samples, float cutoff_hz, float db, float last_db)
 623 {
 624         // A granularity of 32 samples is an okay tradeoff between speed and
 625         // smoothness; recalculating the filters is pretty expensive, so it's
 626         // good that we don't do this all the time.
 627         static constexpr unsigned filter_granularity_samples = 32;
 628
 629         const float cutoff_linear = cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY;
 630         if (fabs(db - last_db) < 1e-3) {
 631                 // Constant over this frame.
 632                 if (fabs(db) > 0.01f) {
 633                         filter->render(data, num_samples, cutoff_linear, 0.5f, db / 40.0f);
 634                 }
 635         } else {
 636                 // We need to do a fade. (Rounding up avoids division by zero.)
 637                 unsigned num_blocks = (num_samples + filter_granularity_samples - 1) / filter_granularity_samples;
 638                 const float inc_db_norm = (db - last_db) / 40.0f / num_blocks;
 639                 float db_norm = db / 40.0f;
 640                 for (size_t i = 0; i < num_samples; i += filter_granularity_samples) {
 641                         size_t samples_this_block = std::min<size_t>(num_samples - i, filter_granularity_samples);
 642                         filter->render(data + i * 2, samples_this_block, cutoff_linear, 0.5f, db_norm);
 643                         db_norm += inc_db_norm;
 644                 }
 645         }
 646 }
 647
 648 }  // namespace
 649
 650 void AudioMixer::apply_eq(unsigned bus_index, vector<float> *samples_bus)
 651 {
 652         constexpr float bass_freq_hz = 200.0f;
 653         constexpr float treble_freq_hz = 4700.0f;
 654
 655         // Cut away everything under 120 Hz (or whatever the cutoff is);
 656         // we don't need it for voice, and it will reduce headroom
 657         // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
 658         // should be dampened.)
 659         if (locut_enabled[bus_index]) {
 660                 locut[bus_index].render(samples_bus->data(), samples_bus->size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
 661         }
 662
 663         // Apply the rest of the EQ. Since we only have a simple three-band EQ,
 664         // we can implement it with two shelf filters. We use a simple gain to
 665         // set the mid-level filter, and then offset the low and high bands
 666         // from that if we need to. (We could perhaps have folded the gain into
 667         // the next part, but it's so cheap that the trouble isn't worth it.)
 668         //
 669         // If any part of the EQ has changed appreciably since last frame,
 670         // we fade smoothly during the course of this frame.
 671         const float bass_db = eq_level_db[bus_index][EQ_BAND_BASS];
 672         const float mid_db = eq_level_db[bus_index][EQ_BAND_MID];
 673         const float treble_db = eq_level_db[bus_index][EQ_BAND_TREBLE];
 674
 675         const float last_bass_db = last_eq_level_db[bus_index][EQ_BAND_BASS];
 676         const float last_mid_db = last_eq_level_db[bus_index][EQ_BAND_MID];
 677         const float last_treble_db = last_eq_level_db[bus_index][EQ_BAND_TREBLE];
 678
 679         assert(samples_bus->size() % 2 == 0);
 680         const unsigned num_samples = samples_bus->size() / 2;
 681
 682         apply_gain(mid_db, last_mid_db, samples_bus);
 683
 684         apply_filter_fade(&eq[bus_index][EQ_BAND_BASS], samples_bus->data(), num_samples, bass_freq_hz, bass_db - mid_db, last_bass_db - last_mid_db);
 685         apply_filter_fade(&eq[bus_index][EQ_BAND_TREBLE], samples_bus->data(), num_samples, treble_freq_hz, treble_db - mid_db, last_treble_db - last_mid_db);
 686
 687         last_eq_level_db[bus_index][EQ_BAND_BASS] = bass_db;
 688         last_eq_level_db[bus_index][EQ_BAND_MID] = mid_db;
 689         last_eq_level_db[bus_index][EQ_BAND_TREBLE] = treble_db;
 690 }
 691
 692 void AudioMixer::add_bus_to_master(unsigned bus_index, const vector<float> &samples_bus, vector<float> *samples_out)
 693 {
 694         assert(samples_bus.size() == samples_out->size());
 695         assert(samples_bus.size() % 2 == 0);
 696         unsigned num_samples = samples_bus.size() / 2;
 697         const float new_volume_db = mute[bus_index] ? -90.0f : fader_volume_db[bus_index].load();
 698         if (fabs(new_volume_db - last_fader_volume_db[bus_index]) > 1e-3) {
 699                 // The volume has changed; do a fade over the course of this frame.
 700                 // (We might have some numerical issues here, but it seems to sound OK.)
 701                 // For the purpose of fading here, the silence floor is set to -90 dB
 702                 // (the fader only goes to -84).
 703                 float old_volume = from_db(max<float>(last_fader_volume_db[bus_index], -90.0f));
 704                 float volume = from_db(max<float>(new_volume_db, -90.0f));
 705
 706                 float volume_inc = pow(volume / old_volume, 1.0 / num_samples);
 707                 volume = old_volume;
 708                 if (bus_index == 0) {
 709                         for (unsigned i = 0; i < num_samples; ++i) {
 710                                 (*samples_out)[i * 2 + 0] = samples_bus[i * 2 + 0] * volume;
 711                                 (*samples_out)[i * 2 + 1] = samples_bus[i * 2 + 1] * volume;
 712                                 volume *= volume_inc;
 713                         }
 714                 } else {
 715                         for (unsigned i = 0; i < num_samples; ++i) {
 716                                 (*samples_out)[i * 2 + 0] += samples_bus[i * 2 + 0] * volume;
 717                                 (*samples_out)[i * 2 + 1] += samples_bus[i * 2 + 1] * volume;
 718                                 volume *= volume_inc;
 719                         }
 720                 }
 721         } else if (new_volume_db > -90.0f) {
 722                 float volume = from_db(new_volume_db);
 723                 if (bus_index == 0) {
 724                         for (unsigned i = 0; i < num_samples; ++i) {
 725                                 (*samples_out)[i * 2 + 0] = samples_bus[i * 2 + 0] * volume;
 726                                 (*samples_out)[i * 2 + 1] = samples_bus[i * 2 + 1] * volume;
 727                         }
 728                 } else {
 729                         for (unsigned i = 0; i < num_samples; ++i) {
 730                                 (*samples_out)[i * 2 + 0] += samples_bus[i * 2 + 0] * volume;
 731                                 (*samples_out)[i * 2 + 1] += samples_bus[i * 2 + 1] * volume;
 732                         }
 733                 }
 734         }
 735
 736         last_fader_volume_db[bus_index] = new_volume_db;
 737 }
 738
 739 void AudioMixer::measure_bus_levels(unsigned bus_index, const vector<float> &left, const vector<float> &right)
 740 {
 741         assert(left.size() == right.size());
 742         const float volume = mute[bus_index] ? 0.0f : from_db(fader_volume_db[bus_index]);
 743         const float peak_levels[2] = {
 744                 find_peak(left.data(), left.size()) * volume,
 745                 find_peak(right.data(), right.size()) * volume
 746         };
 747         for (unsigned channel = 0; channel < 2; ++channel) {
 748                 // Compute the current value, including hold and falloff.
 749                 // The constants are borrowed from zita-mu1 by Fons Adriaensen.
 750                 static constexpr float hold_sec = 0.5f;
 751                 static constexpr float falloff_db_sec = 15.0f;  // dB/sec falloff after hold.
 752                 float current_peak;
 753                 PeakHistory &history = peak_history[bus_index][channel];
 754                 history.historic_peak = max(history.historic_peak, peak_levels[channel]);
 755                 if (history.age_seconds < hold_sec) {
 756                         current_peak = history.last_peak;
 757                 } else {
 758                         current_peak = history.last_peak * from_db(-falloff_db_sec * (history.age_seconds - hold_sec));
 759                 }
 760
 761                 // See if we have a new peak to replace the old (possibly falling) one.
 762                 if (peak_levels[channel] > current_peak) {
 763                         history.last_peak = peak_levels[channel];
 764                         history.age_seconds = 0.0f;  // Not 100% correct, but more than good enough given our frame sizes.
 765                         current_peak = peak_levels[channel];
 766                 } else {
 767                         history.age_seconds += float(left.size()) / OUTPUT_FREQUENCY;
 768                 }
 769                 history.current_level = peak_levels[channel];
 770                 history.current_peak = current_peak;
 771         }
 772 }
 773
 774 void AudioMixer::update_meters(const vector<float> &samples)
 775 {
 776         // Upsample 4x to find interpolated peak.
 777         peak_resampler.inp_data = const_cast<float *>(samples.data());
 778         peak_resampler.inp_count = samples.size() / 2;
 779
 780         vector<float> interpolated_samples;
 781         interpolated_samples.resize(samples.size());
 782         {
 783                 lock_guard<mutex> lock(audio_measure_mutex);
 784
 785                 while (peak_resampler.inp_count > 0) {  // About four iterations.
 786                         peak_resampler.out_data = &interpolated_samples[0];
 787                         peak_resampler.out_count = interpolated_samples.size() / 2;
 788                         peak_resampler.process();
 789                         size_t out_stereo_samples = interpolated_samples.size() / 2 - peak_resampler.out_count;
 790                         peak = max<float>(peak, find_peak(interpolated_samples.data(), out_stereo_samples * 2));
 791                         peak_resampler.out_data = nullptr;
 792                 }
 793         }
 794
 795         // Find R128 levels and L/R correlation.
 796         vector<float> left, right;
 797         deinterleave_samples(samples, &left, &right);
 798         float *ptrs[] = { left.data(), right.data() };
 799         {
 800                 lock_guard<mutex> lock(audio_measure_mutex);
 801                 r128.process(left.size(), ptrs);
 802                 correlation.process_samples(samples);
 803         }
 804
 805         send_audio_level_callback();
 806 }
 807
 808 void AudioMixer::reset_meters()
 809 {
 810         lock_guard<mutex> lock(audio_measure_mutex);
 811         peak_resampler.reset();
 812         peak = 0.0f;
 813         r128.reset();
 814         r128.integr_start();
 815         correlation.reset();
 816 }
 817
 818 void AudioMixer::send_audio_level_callback()
 819 {
 820         if (audio_level_callback == nullptr) {
 821                 return;
 822         }
 823
 824         lock_guard<mutex> lock(audio_measure_mutex);
 825         double loudness_s = r128.loudness_S();
 826         double loudness_i = r128.integrated();
 827         double loudness_range_low = r128.range_min();
 828         double loudness_range_high = r128.range_max();
 829
 830         vector<BusLevel> bus_levels;
 831         bus_levels.resize(input_mapping.buses.size());
 832         {
 833                 lock_guard<mutex> lock(compressor_mutex);
 834                 for (unsigned bus_index = 0; bus_index < bus_levels.size(); ++bus_index) {
 835                         bus_levels[bus_index].current_level_dbfs[0] = to_db(peak_history[bus_index][0].current_level);
 836                         bus_levels[bus_index].current_level_dbfs[1] = to_db(peak_history[bus_index][1].current_level);
 837                         bus_levels[bus_index].peak_level_dbfs[0] = to_db(peak_history[bus_index][0].current_peak);
 838                         bus_levels[bus_index].peak_level_dbfs[1] = to_db(peak_history[bus_index][1].current_peak);
 839                         bus_levels[bus_index].historic_peak_dbfs = to_db(
 840                                 max(peak_history[bus_index][0].historic_peak,
 841                                     peak_history[bus_index][1].historic_peak));
 842                         bus_levels[bus_index].gain_staging_db = gain_staging_db[bus_index];
 843                         if (compressor_enabled[bus_index]) {
 844                                 bus_levels[bus_index].compressor_attenuation_db = -to_db(compressor[bus_index]->get_attenuation());
 845                         } else {
 846                                 bus_levels[bus_index].compressor_attenuation_db = 0.0;
 847                         }
 848                 }
 849         }
 850
 851         audio_level_callback(loudness_s, to_db(peak), bus_levels,
 852                 loudness_i, loudness_range_low, loudness_range_high,
 853                 to_db(final_makeup_gain),
 854                 correlation.get_correlation());
 855 }
 856
 857 map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
 858 {
 859         lock_guard<timed_mutex> lock(audio_mutex);
 860
 861         map<DeviceSpec, DeviceInfo> devices;
 862         for (unsigned card_index = 0; card_index < num_cards; ++card_index) {
 863                 const DeviceSpec spec{ InputSourceType::CAPTURE_CARD, card_index };
 864                 const AudioDevice *device = &video_cards[card_index];
 865                 DeviceInfo info;
 866                 info.display_name = device->display_name;
 867                 info.num_channels = 8;
 868                 devices.insert(make_pair(spec, info));
 869         }
 870         vector<ALSAPool::Device> available_alsa_devices = alsa_pool.get_devices();
 871         for (unsigned card_index = 0; card_index < available_alsa_devices.size(); ++card_index) {
 872                 const DeviceSpec spec{ InputSourceType::ALSA_INPUT, card_index };
 873                 const ALSAPool::Device &device = available_alsa_devices[card_index];
 874                 DeviceInfo info;
 875                 info.display_name = device.display_name();
 876                 info.num_channels = device.num_channels;
 877                 info.alsa_name = device.name;
 878                 info.alsa_info = device.info;
 879                 info.alsa_address = device.address;
 880                 devices.insert(make_pair(spec, info));
 881         }
 882         return devices;
 883 }
 884
 885 void AudioMixer::set_display_name(DeviceSpec device_spec, const string &name)
 886 {
 887         AudioDevice *device = find_audio_device(device_spec);
 888
 889         lock_guard<timed_mutex> lock(audio_mutex);
 890         device->display_name = name;
 891 }
 892
 893 void AudioMixer::serialize_device(DeviceSpec device_spec, DeviceSpecProto *device_spec_proto)
 894 {
 895         lock_guard<timed_mutex> lock(audio_mutex);
 896         switch (device_spec.type) {
 897                 case InputSourceType::SILENCE:
 898                         device_spec_proto->set_type(DeviceSpecProto::SILENCE);
 899                         break;
 900                 case InputSourceType::CAPTURE_CARD:
 901                         device_spec_proto->set_type(DeviceSpecProto::CAPTURE_CARD);
 902                         device_spec_proto->set_index(device_spec.index);
 903                         device_spec_proto->set_display_name(video_cards[device_spec.index].display_name);
 904                         break;
 905                 case InputSourceType::ALSA_INPUT:
 906                         alsa_pool.serialize_device(device_spec.index, device_spec_proto);
 907                         break;
 908         }
 909 }
 910
 911 void AudioMixer::set_simple_input(unsigned card_index)
 912 {
 913         InputMapping new_input_mapping;
 914         InputMapping::Bus input;
 915         input.name = "Main";
 916         input.device.type = InputSourceType::CAPTURE_CARD;
 917         input.device.index = card_index;
 918         input.source_channel[0] = 0;
 919         input.source_channel[1] = 1;
 920
 921         new_input_mapping.buses.push_back(input);
 922
 923         lock_guard<timed_mutex> lock(audio_mutex);
 924         current_mapping_mode = MappingMode::SIMPLE;
 925         set_input_mapping_lock_held(new_input_mapping);
 926         fader_volume_db[0] = 0.0f;
 927 }
 928
 929 unsigned AudioMixer::get_simple_input() const
 930 {
 931         lock_guard<timed_mutex> lock(audio_mutex);
 932         if (input_mapping.buses.size() == 1 &&
 933             input_mapping.buses[0].device.type == InputSourceType::CAPTURE_CARD &&
 934             input_mapping.buses[0].source_channel[0] == 0 &&
 935             input_mapping.buses[0].source_channel[1] == 1) {
 936                 return input_mapping.buses[0].device.index;
 937         } else {
 938                 return numeric_limits<unsigned>::max();
 939         }
 940 }
 941
 942 void AudioMixer::set_input_mapping(const InputMapping &new_input_mapping)
 943 {
 944         lock_guard<timed_mutex> lock(audio_mutex);
 945         set_input_mapping_lock_held(new_input_mapping);
 946         current_mapping_mode = MappingMode::MULTICHANNEL;
 947 }
 948
 949 AudioMixer::MappingMode AudioMixer::get_mapping_mode() const
 950 {
 951         lock_guard<timed_mutex> lock(audio_mutex);
 952         return current_mapping_mode;
 953 }
 954
 955 void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mapping)
 956 {
 957         map<DeviceSpec, set<unsigned>> interesting_channels;
 958         for (const InputMapping::Bus &bus : new_input_mapping.buses) {
 959                 if (bus.device.type == InputSourceType::CAPTURE_CARD ||
 960                     bus.device.type == InputSourceType::ALSA_INPUT) {
 961                         for (unsigned channel = 0; channel < 2; ++channel) {
 962                                 if (bus.source_channel[channel] != -1) {
 963                                         interesting_channels[bus.device].insert(bus.source_channel[channel]);
 964                                 }
 965                         }
 966                 }
 967         }
 968
 969         // Reset resamplers for all cards that don't have the exact same state as before.
 970         for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 971                 const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 972                 AudioDevice *device = find_audio_device(device_spec);
 973                 if (device->interesting_channels != interesting_channels[device_spec]) {
 974                         device->interesting_channels = interesting_channels[device_spec];
 975                         reset_resampler_mutex_held(device_spec);
 976                 }
 977         }
 978         for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 979                 const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 980                 AudioDevice *device = find_audio_device(device_spec);
 981                 if (interesting_channels[device_spec].empty()) {
 982                         alsa_pool.release_device(card_index);
 983                 } else {
 984                         alsa_pool.hold_device(card_index);
 985                 }
 986                 if (device->interesting_channels != interesting_channels[device_spec]) {
 987                         device->interesting_channels = interesting_channels[device_spec];
 988                         alsa_pool.reset_device(device_spec.index);
 989                         reset_resampler_mutex_held(device_spec);
 990                 }
 991         }
 992
 993         input_mapping = new_input_mapping;
 994 }
 995
 996 InputMapping AudioMixer::get_input_mapping() const
 997 {
 998         lock_guard<timed_mutex> lock(audio_mutex);
 999         return input_mapping;
1000 }
1001
1002 unsigned AudioMixer::num_buses() const
1003 {
1004         lock_guard<timed_mutex> lock(audio_mutex);
1005         return input_mapping.buses.size();
1006 }
1007
1008 void AudioMixer::reset_peak(unsigned bus_index)
1009 {
1010         lock_guard<timed_mutex> lock(audio_mutex);
1011         for (unsigned channel = 0; channel < 2; ++channel) {
1012                 PeakHistory &history = peak_history[bus_index][channel];
1013                 history.current_level = 0.0f;
1014                 history.historic_peak = 0.0f;
1015                 history.current_peak = 0.0f;
1016                 history.last_peak = 0.0f;
1017                 history.age_seconds = 0.0f;
1018         }
1019 }
1020
1021 AudioMixer *global_audio_mixer = nullptr;