git.sesse.net Git - nageru/blob - audio_mixer.cpp

   1 #include "audio_mixer.h"
   2
   3 #include <assert.h>
   4 #include <endian.h>
   5 #include <bmusb/bmusb.h>
   6 #include <stdio.h>
   7 #include <endian.h>
   8 #include <cmath>
   9 #include <limits>
  10 #ifdef __SSE__
  11 #include <immintrin.h>
  12 #endif
  13
  14 #include "db.h"
  15 #include "flags.h"
  16 #include "mixer.h"
  17 #include "state.pb.h"
  18 #include "timebase.h"
  19
  20 using namespace bmusb;
  21 using namespace std;
  22 using namespace std::placeholders;
  23
  24 namespace {
  25
  26 // TODO: If these prove to be a bottleneck, they can be SSSE3-optimized
  27 // (usually including multiple channels at a time).
  28
  29 void convert_fixed16_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  30                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  31                              size_t num_samples)
  32 {
  33         assert(in_channel < in_num_channels);
  34         assert(out_channel < out_num_channels);
  35         src += in_channel * 2;
  36         dst += out_channel;
  37
  38         for (size_t i = 0; i < num_samples; ++i) {
  39                 int16_t s = le16toh(*(int16_t *)src);
  40                 *dst = s * (1.0f / 32768.0f);
  41
  42                 src += 2 * in_num_channels;
  43                 dst += out_num_channels;
  44         }
  45 }
  46
  47 void convert_fixed24_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  48                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  49                              size_t num_samples)
  50 {
  51         assert(in_channel < in_num_channels);
  52         assert(out_channel < out_num_channels);
  53         src += in_channel * 3;
  54         dst += out_channel;
  55
  56         for (size_t i = 0; i < num_samples; ++i) {
  57                 uint32_t s1 = src[0];
  58                 uint32_t s2 = src[1];
  59                 uint32_t s3 = src[2];
  60                 uint32_t s = s1 | (s1 << 8) | (s2 << 16) | (s3 << 24);
  61                 *dst = int(s) * (1.0f / 2147483648.0f);
  62
  63                 src += 3 * in_num_channels;
  64                 dst += out_num_channels;
  65         }
  66 }
  67
  68 void convert_fixed32_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
  69                              const uint8_t *src, size_t in_channel, size_t in_num_channels,
  70                              size_t num_samples)
  71 {
  72         assert(in_channel < in_num_channels);
  73         assert(out_channel < out_num_channels);
  74         src += in_channel * 4;
  75         dst += out_channel;
  76
  77         for (size_t i = 0; i < num_samples; ++i) {
  78                 int32_t s = le32toh(*(int32_t *)src);
  79                 *dst = s * (1.0f / 2147483648.0f);
  80
  81                 src += 4 * in_num_channels;
  82                 dst += out_num_channels;
  83         }
  84 }
  85
  86 float find_peak_plain(const float *samples, size_t num_samples) __attribute__((unused));
  87
  88 float find_peak_plain(const float *samples, size_t num_samples)
  89 {
  90         float m = fabs(samples[0]);
  91         for (size_t i = 1; i < num_samples; ++i) {
  92                 m = max(m, fabs(samples[i]));
  93         }
  94         return m;
  95 }
  96
  97 #ifdef __SSE__
  98 static inline float horizontal_max(__m128 m)
  99 {
 100         __m128 tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 0, 3, 2));
 101         m = _mm_max_ps(m, tmp);
 102         tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 3, 0, 1));
 103         m = _mm_max_ps(m, tmp);
 104         return _mm_cvtss_f32(m);
 105 }
 106
 107 float find_peak(const float *samples, size_t num_samples)
 108 {
 109         const __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu));
 110         __m128 m = _mm_setzero_ps();
 111         for (size_t i = 0; i < (num_samples & ~3); i += 4) {
 112                 __m128 x = _mm_loadu_ps(samples + i);
 113                 x = _mm_and_ps(x, abs_mask);
 114                 m = _mm_max_ps(m, x);
 115         }
 116         float result = horizontal_max(m);
 117
 118         for (size_t i = (num_samples & ~3); i < num_samples; ++i) {
 119                 result = max(result, fabs(samples[i]));
 120         }
 121
 122 #if 0
 123         // Self-test. We should be bit-exact the same.
 124         float reference_result = find_peak_plain(samples, num_samples);
 125         if (result != reference_result) {
 126                 fprintf(stderr, "Error: Peak is %f [%f %f %f %f]; should be %f.\n",
 127                         result,
 128                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(0, 0, 0, 0))),
 129                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))),
 130                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))),
 131                         _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))),
 132                         reference_result);
 133                 abort();
 134         }
 135 #endif
 136         return result;
 137 }
 138 #else
 139 float find_peak(const float *samples, size_t num_samples)
 140 {
 141         return find_peak_plain(samples, num_samples);
 142 }
 143 #endif
 144
 145 void deinterleave_samples(const vector<float> &in, vector<float> *out_l, vector<float> *out_r)
 146 {
 147         size_t num_samples = in.size() / 2;
 148         out_l->resize(num_samples);
 149         out_r->resize(num_samples);
 150
 151         const float *inptr = in.data();
 152         float *lptr = &(*out_l)[0];
 153         float *rptr = &(*out_r)[0];
 154         for (size_t i = 0; i < num_samples; ++i) {
 155                 *lptr++ = *inptr++;
 156                 *rptr++ = *inptr++;
 157         }
 158 }
 159
 160 }  // namespace
 161
 162 AudioMixer::AudioMixer(unsigned num_cards)
 163         : num_cards(num_cards),
 164           limiter(OUTPUT_FREQUENCY),
 165           correlation(OUTPUT_FREQUENCY)
 166 {
 167         global_audio_mixer = this;
 168
 169         for (unsigned bus_index = 0; bus_index < MAX_BUSES; ++bus_index) {
 170                 locut[bus_index].init(FILTER_HPF, 2);
 171                 eq[bus_index][EQ_BAND_BASS].init(FILTER_LOW_SHELF, 1);
 172                 // Note: EQ_BAND_MID isn't used (see comments in apply_eq()).
 173                 eq[bus_index][EQ_BAND_TREBLE].init(FILTER_HIGH_SHELF, 1);
 174                 compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
 175                 level_compressor[bus_index].reset(new StereoCompressor(OUTPUT_FREQUENCY));
 176
 177                 set_bus_settings(bus_index, get_default_bus_settings());
 178         }
 179         set_limiter_enabled(global_flags.limiter_enabled);
 180         set_final_makeup_gain_auto(global_flags.final_makeup_gain_auto);
 181         alsa_pool.init();
 182
 183         if (!global_flags.input_mapping_filename.empty()) {
 184                 current_mapping_mode = MappingMode::MULTICHANNEL;
 185                 InputMapping new_input_mapping;
 186                 if (!load_input_mapping_from_file(get_devices(),
 187                                                   global_flags.input_mapping_filename,
 188                                                   &new_input_mapping)) {
 189                         fprintf(stderr, "Failed to load input mapping from '%s', exiting.\n",
 190                                 global_flags.input_mapping_filename.c_str());
 191                         exit(1);
 192                 }
 193                 set_input_mapping(new_input_mapping);
 194         } else {
 195                 set_simple_input(/*card_index=*/0);
 196                 if (global_flags.multichannel_mapping_mode) {
 197                         current_mapping_mode = MappingMode::MULTICHANNEL;
 198                 }
 199         }
 200
 201         r128.init(2, OUTPUT_FREQUENCY);
 202         r128.integr_start();
 203
 204         // hlen=16 is pretty low quality, but we use quite a bit of CPU otherwise,
 205         // and there's a limit to how important the peak meter is.
 206         peak_resampler.setup(OUTPUT_FREQUENCY, OUTPUT_FREQUENCY * 4, /*num_channels=*/2, /*hlen=*/16, /*frel=*/1.0);
 207 }
 208
 209 void AudioMixer::reset_resampler(DeviceSpec device_spec)
 210 {
 211         lock_guard<timed_mutex> lock(audio_mutex);
 212         reset_resampler_mutex_held(device_spec);
 213 }
 214
 215 void AudioMixer::reset_resampler_mutex_held(DeviceSpec device_spec)
 216 {
 217         AudioDevice *device = find_audio_device(device_spec);
 218
 219         if (device->interesting_channels.empty()) {
 220                 device->resampling_queue.reset();
 221         } else {
 222                 // TODO: ResamplingQueue should probably take the full device spec.
 223                 // (It's only used for console output, though.)
 224                 device->resampling_queue.reset(new ResamplingQueue(device_spec.index, device->capture_frequency, OUTPUT_FREQUENCY, device->interesting_channels.size()));
 225         }
 226         device->next_local_pts = 0;
 227 }
 228
 229 bool AudioMixer::add_audio(DeviceSpec device_spec, const uint8_t *data, unsigned num_samples, AudioFormat audio_format, int64_t frame_length)
 230 {
 231         AudioDevice *device = find_audio_device(device_spec);
 232
 233         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 234         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 235                 return false;
 236         }
 237         if (device->resampling_queue == nullptr) {
 238                 // No buses use this device; throw it away.
 239                 return true;
 240         }
 241
 242         unsigned num_channels = device->interesting_channels.size();
 243         assert(num_channels > 0);
 244
 245         // Convert the audio to fp32.
 246         unique_ptr<float[]> audio(new float[num_samples * num_channels]);
 247         unsigned channel_index = 0;
 248         for (auto channel_it = device->interesting_channels.cbegin(); channel_it != device->interesting_channels.end(); ++channel_it, ++channel_index) {
 249                 switch (audio_format.bits_per_sample) {
 250                 case 0:
 251                         assert(num_samples == 0);
 252                         break;
 253                 case 16:
 254                         convert_fixed16_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 255                         break;
 256                 case 24:
 257                         convert_fixed24_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 258                         break;
 259                 case 32:
 260                         convert_fixed32_to_fp32(audio.get(), channel_index, num_channels, data, *channel_it, audio_format.num_channels, num_samples);
 261                         break;
 262                 default:
 263                         fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
 264                         assert(false);
 265                 }
 266         }
 267
 268         // Now add it.
 269         int64_t local_pts = device->next_local_pts;
 270         device->resampling_queue->add_input_samples(local_pts / double(TIMEBASE), audio.get(), num_samples);
 271         device->next_local_pts = local_pts + frame_length;
 272         return true;
 273 }
 274
 275 bool AudioMixer::add_silence(DeviceSpec device_spec, unsigned samples_per_frame, unsigned num_frames, int64_t frame_length)
 276 {
 277         AudioDevice *device = find_audio_device(device_spec);
 278
 279         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 280         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 281                 return false;
 282         }
 283         if (device->resampling_queue == nullptr) {
 284                 // No buses use this device; throw it away.
 285                 return true;
 286         }
 287
 288         unsigned num_channels = device->interesting_channels.size();
 289         assert(num_channels > 0);
 290
 291         vector<float> silence(samples_per_frame * num_channels, 0.0f);
 292         for (unsigned i = 0; i < num_frames; ++i) {
 293                 device->resampling_queue->add_input_samples(device->next_local_pts / double(TIMEBASE), silence.data(), samples_per_frame);
 294                 // Note that if the format changed in the meantime, we have
 295                 // no way of detecting that; we just have to assume the frame length
 296                 // is always the same.
 297                 device->next_local_pts += frame_length;
 298         }
 299         return true;
 300 }
 301
 302 bool AudioMixer::silence_card(DeviceSpec device_spec, bool silence)
 303 {
 304         AudioDevice *device = find_audio_device(device_spec);
 305
 306         unique_lock<timed_mutex> lock(audio_mutex, defer_lock);
 307         if (!lock.try_lock_for(chrono::milliseconds(10))) {
 308                 return false;
 309         }
 310
 311         if (device->silenced && !silence) {
 312                 reset_resampler_mutex_held(device_spec);
 313         }
 314         device->silenced = silence;
 315         return true;
 316 }
 317
 318 AudioMixer::BusSettings AudioMixer::get_default_bus_settings()
 319 {
 320         BusSettings settings;
 321         settings.fader_volume_db = 0.0f;
 322         settings.locut_enabled = global_flags.locut_enabled;
 323         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 324                 settings.eq_level_db[band_index] = 0.0f;
 325         }
 326         settings.gain_staging_db = global_flags.initial_gain_staging_db;
 327         settings.level_compressor_enabled = global_flags.gain_staging_auto;
 328         settings.compressor_threshold_dbfs = ref_level_dbfs - 12.0f;  // -12 dB.
 329         settings.compressor_enabled = global_flags.compressor_enabled;
 330         return settings;
 331 }
 332
 333 AudioMixer::BusSettings AudioMixer::get_bus_settings(unsigned bus_index) const
 334 {
 335         lock_guard<timed_mutex> lock(audio_mutex);
 336         BusSettings settings;
 337         settings.fader_volume_db = fader_volume_db[bus_index];
 338         settings.locut_enabled = locut_enabled[bus_index];
 339         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 340                 settings.eq_level_db[band_index] = eq_level_db[bus_index][band_index];
 341         }
 342         settings.gain_staging_db = gain_staging_db[bus_index];
 343         settings.level_compressor_enabled = level_compressor_enabled[bus_index];
 344         settings.compressor_threshold_dbfs = compressor_threshold_dbfs[bus_index];
 345         settings.compressor_enabled = compressor_enabled[bus_index];
 346         return settings;
 347 }
 348
 349 void AudioMixer::set_bus_settings(unsigned bus_index, const AudioMixer::BusSettings &settings)
 350 {
 351         lock_guard<timed_mutex> lock(audio_mutex);
 352         fader_volume_db[bus_index] = settings.fader_volume_db;
 353         locut_enabled[bus_index] = settings.locut_enabled;
 354         for (unsigned band_index = 0; band_index < NUM_EQ_BANDS; ++band_index) {
 355                 eq_level_db[bus_index][band_index] = settings.eq_level_db[band_index];
 356         }
 357         gain_staging_db[bus_index] = settings.gain_staging_db;
 358         level_compressor_enabled[bus_index] = settings.level_compressor_enabled;
 359         compressor_threshold_dbfs[bus_index] = settings.compressor_threshold_dbfs;
 360         compressor_enabled[bus_index] = settings.compressor_enabled;
 361 }
 362
 363 AudioMixer::AudioDevice *AudioMixer::find_audio_device(DeviceSpec device)
 364 {
 365         switch (device.type) {
 366         case InputSourceType::CAPTURE_CARD:
 367                 return &video_cards[device.index];
 368         case InputSourceType::ALSA_INPUT:
 369                 return &alsa_inputs[device.index];
 370         case InputSourceType::SILENCE:
 371         default:
 372                 assert(false);
 373         }
 374         return nullptr;
 375 }
 376
 377 // Get a pointer to the given channel from the given device.
 378 // The channel must be picked out earlier and resampled.
 379 void AudioMixer::find_sample_src_from_device(const map<DeviceSpec, vector<float>> &samples_card, DeviceSpec device_spec, int source_channel, const float **srcptr, unsigned *stride)
 380 {
 381         static float zero = 0.0f;
 382         if (source_channel == -1 || device_spec.type == InputSourceType::SILENCE) {
 383                 *srcptr = &zero;
 384                 *stride = 0;
 385                 return;
 386         }
 387         AudioDevice *device = find_audio_device(device_spec);
 388         assert(device->interesting_channels.count(source_channel) != 0);
 389         unsigned channel_index = 0;
 390         for (int channel : device->interesting_channels) {
 391                 if (channel == source_channel) break;
 392                 ++channel_index;
 393         }
 394         assert(channel_index < device->interesting_channels.size());
 395         const auto it = samples_card.find(device_spec);
 396         assert(it != samples_card.end());
 397         *srcptr = &(it->second)[channel_index];
 398         *stride = device->interesting_channels.size();
 399 }
 400
 401 // TODO: Can be SSSE3-optimized if need be.
 402 void AudioMixer::fill_audio_bus(const map<DeviceSpec, vector<float>> &samples_card, const InputMapping::Bus &bus, unsigned num_samples, float *output)
 403 {
 404         if (bus.device.type == InputSourceType::SILENCE) {
 405                 memset(output, 0, num_samples * sizeof(*output));
 406         } else {
 407                 assert(bus.device.type == InputSourceType::CAPTURE_CARD ||
 408                        bus.device.type == InputSourceType::ALSA_INPUT);
 409                 const float *lsrc, *rsrc;
 410                 unsigned lstride, rstride;
 411                 float *dptr = output;
 412                 find_sample_src_from_device(samples_card, bus.device, bus.source_channel[0], &lsrc, &lstride);
 413                 find_sample_src_from_device(samples_card, bus.device, bus.source_channel[1], &rsrc, &rstride);
 414                 for (unsigned i = 0; i < num_samples; ++i) {
 415                         *dptr++ = *lsrc;
 416                         *dptr++ = *rsrc;
 417                         lsrc += lstride;
 418                         rsrc += rstride;
 419                 }
 420         }
 421 }
 422
 423 vector<DeviceSpec> AudioMixer::get_active_devices() const
 424 {
 425         vector<DeviceSpec> ret;
 426         for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 427                 const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 428                 if (!find_audio_device(device_spec)->interesting_channels.empty()) {
 429                         ret.push_back(device_spec);
 430                 }
 431         }
 432         for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 433                 const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 434                 if (!find_audio_device(device_spec)->interesting_channels.empty()) {
 435                         ret.push_back(device_spec);
 436                 }
 437         }
 438         return ret;
 439 }
 440
 441 vector<float> AudioMixer::get_output(double pts, unsigned num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy)
 442 {
 443         map<DeviceSpec, vector<float>> samples_card;
 444         vector<float> samples_bus;
 445
 446         lock_guard<timed_mutex> lock(audio_mutex);
 447
 448         // Pick out all the interesting channels from all the cards.
 449         for (const DeviceSpec &device_spec : get_active_devices()) {
 450                 AudioDevice *device = find_audio_device(device_spec);
 451                 samples_card[device_spec].resize(num_samples * device->interesting_channels.size());
 452                 if (device->silenced) {
 453                         memset(&samples_card[device_spec][0], 0, samples_card[device_spec].size() * sizeof(float));
 454                 } else {
 455                         device->resampling_queue->get_output_samples(
 456                                 pts,
 457                                 &samples_card[device_spec][0],
 458                                 num_samples,
 459                                 rate_adjustment_policy);
 460                 }
 461         }
 462
 463         vector<float> samples_out, left, right;
 464         samples_out.resize(num_samples * 2);
 465         samples_bus.resize(num_samples * 2);
 466         for (unsigned bus_index = 0; bus_index < input_mapping.buses.size(); ++bus_index) {
 467                 fill_audio_bus(samples_card, input_mapping.buses[bus_index], num_samples, &samples_bus[0]);
 468                 apply_eq(bus_index, &samples_bus);
 469
 470                 {
 471                         lock_guard<mutex> lock(compressor_mutex);
 472
 473                         // Apply a level compressor to get the general level right.
 474                         // Basically, if it's over about -40 dBFS, we squeeze it down to that level
 475                         // (or more precisely, near it, since we don't use infinite ratio),
 476                         // then apply a makeup gain to get it to -14 dBFS. -14 dBFS is, of course,
 477                         // entirely arbitrary, but from practical tests with speech, it seems to
 478                         // put ut around -23 LUFS, so it's a reasonable starting point for later use.
 479                         if (level_compressor_enabled[bus_index]) {
 480                                 float threshold = 0.01f;   // -40 dBFS.
 481                                 float ratio = 20.0f;
 482                                 float attack_time = 0.5f;
 483                                 float release_time = 20.0f;
 484                                 float makeup_gain = from_db(ref_level_dbfs - (-40.0f));  // +26 dB.
 485                                 level_compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 486                                 gain_staging_db[bus_index] = to_db(level_compressor[bus_index]->get_attenuation() * makeup_gain);
 487                         } else {
 488                                 // Just apply the gain we already had.
 489                                 float g = from_db(gain_staging_db[bus_index]);
 490                                 for (size_t i = 0; i < samples_bus.size(); ++i) {
 491                                         samples_bus[i] *= g;
 492                                 }
 493                         }
 494
 495 #if 0
 496                         printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
 497                                 level_compressor.get_level(), to_db(level_compressor.get_level()),
 498                                 level_compressor.get_attenuation(), to_db(level_compressor.get_attenuation()),
 499                                 to_db(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
 500 #endif
 501
 502                         // The real compressor.
 503                         if (compressor_enabled[bus_index]) {
 504                                 float threshold = from_db(compressor_threshold_dbfs[bus_index]);
 505                                 float ratio = 20.0f;
 506                                 float attack_time = 0.005f;
 507                                 float release_time = 0.040f;
 508                                 float makeup_gain = 2.0f;  // +6 dB.
 509                                 compressor[bus_index]->process(samples_bus.data(), samples_bus.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 510                 //              compressor_att = compressor.get_attenuation();
 511                         }
 512                 }
 513
 514                 add_bus_to_master(bus_index, samples_bus, &samples_out);
 515                 deinterleave_samples(samples_bus, &left, &right);
 516                 measure_bus_levels(bus_index, left, right);
 517         }
 518
 519         {
 520                 lock_guard<mutex> lock(compressor_mutex);
 521
 522                 // Finally a limiter at -4 dB (so, -10 dBFS) to take out the worst peaks only.
 523                 // Note that since ratio is not infinite, we could go slightly higher than this.
 524                 if (limiter_enabled) {
 525                         float threshold = from_db(limiter_threshold_dbfs);
 526                         float ratio = 30.0f;
 527                         float attack_time = 0.0f;  // Instant.
 528                         float release_time = 0.020f;
 529                         float makeup_gain = 1.0f;  // 0 dB.
 530                         limiter.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
 531         //              limiter_att = limiter.get_attenuation();
 532                 }
 533
 534         //      printf("limiter=%+5.1f  compressor=%+5.1f\n", to_db(limiter_att), to_db(compressor_att));
 535         }
 536
 537         // At this point, we are most likely close to +0 LU (at least if the
 538         // faders sum to 0 dB and the compressors are on), but all of our
 539         // measurements have been on raw sample values, not R128 values.
 540         // So we have a final makeup gain to get us to +0 LU; the gain
 541         // adjustments required should be relatively small, and also, the
 542         // offset shouldn't change much (only if the type of audio changes
 543         // significantly). Thus, we shoot for updating this value basically
 544         // “whenever we process buffers”, since the R128 calculation isn't exactly
 545         // something we get out per-sample.
 546         //
 547         // Note that there's a feedback loop here, so we choose a very slow filter
 548         // (half-time of 30 seconds).
 549         double target_loudness_factor, alpha;
 550         double loudness_lu = r128.loudness_M() - ref_level_lufs;
 551         double current_makeup_lu = to_db(final_makeup_gain);
 552         target_loudness_factor = final_makeup_gain * from_db(-loudness_lu);
 553
 554         // If we're outside +/- 5 LU uncorrected, we don't count it as
 555         // a normal signal (probably silence) and don't change the
 556         // correction factor; just apply what we already have.
 557         if (fabs(loudness_lu - current_makeup_lu) >= 5.0 || !final_makeup_gain_auto) {
 558                 alpha = 0.0;
 559         } else {
 560                 // Formula adapted from
 561                 // https://en.wikipedia.org/wiki/Low-pass_filter#Simple_infinite_impulse_response_filter.
 562                 const double half_time_s = 30.0;
 563                 const double fc_mul_2pi_delta_t = 1.0 / (half_time_s * OUTPUT_FREQUENCY);
 564                 alpha = fc_mul_2pi_delta_t / (fc_mul_2pi_delta_t + 1.0);
 565         }
 566
 567         {
 568                 lock_guard<mutex> lock(compressor_mutex);
 569                 double m = final_makeup_gain;
 570                 for (size_t i = 0; i < samples_out.size(); i += 2) {
 571                         samples_out[i + 0] *= m;
 572                         samples_out[i + 1] *= m;
 573                         m += (target_loudness_factor - m) * alpha;
 574                 }
 575                 final_makeup_gain = m;
 576         }
 577
 578         update_meters(samples_out);
 579
 580         return samples_out;
 581 }
 582
 583 namespace {
 584
 585 void apply_filter_fade(StereoFilter *filter, float *data, unsigned num_samples, float cutoff_hz, float db, float last_db)
 586 {
 587         // A granularity of 32 samples is an okay tradeoff between speed and
 588         // smoothness; recalculating the filters is pretty expensive, so it's
 589         // good that we don't do this all the time.
 590         static constexpr unsigned filter_granularity_samples = 32;
 591
 592         const float cutoff_linear = cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY;
 593         if (fabs(db - last_db) < 1e-3) {
 594                 // Constant over this frame.
 595                 if (fabs(db) > 0.01f) {
 596                         filter->render(data, num_samples, cutoff_linear, 0.5f, db / 40.0f);
 597                 }
 598         } else {
 599                 // We need to do a fade. (Rounding up avoids division by zero.)
 600                 unsigned num_blocks = (num_samples + filter_granularity_samples - 1) / filter_granularity_samples;
 601                 const float inc_db_norm = (db - last_db) / 40.0f / num_blocks;
 602                 float db_norm = db / 40.0f;
 603                 for (size_t i = 0; i < num_samples; i += filter_granularity_samples) {
 604                         size_t samples_this_block = std::min<size_t>(num_samples - i, filter_granularity_samples);
 605                         filter->render(data + i * 2, samples_this_block, cutoff_linear, 0.5f, db_norm);
 606                         db_norm += inc_db_norm;
 607                 }
 608         }
 609 }
 610
 611 }  // namespace
 612
 613 void AudioMixer::apply_eq(unsigned bus_index, vector<float> *samples_bus)
 614 {
 615         constexpr float bass_freq_hz = 200.0f;
 616         constexpr float treble_freq_hz = 4700.0f;
 617
 618         // Cut away everything under 120 Hz (or whatever the cutoff is);
 619         // we don't need it for voice, and it will reduce headroom
 620         // and confuse the compressor. (In particular, any hums at 50 or 60 Hz
 621         // should be dampened.)
 622         if (locut_enabled[bus_index]) {
 623                 locut[bus_index].render(samples_bus->data(), samples_bus->size() / 2, locut_cutoff_hz * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
 624         }
 625
 626         // Apply the rest of the EQ. Since we only have a simple three-band EQ,
 627         // we can implement it with two shelf filters. We use a simple gain to
 628         // set the mid-level filter, and then offset the low and high bands
 629         // from that if we need to. (We could perhaps have folded the gain into
 630         // the next part, but it's so cheap that the trouble isn't worth it.)
 631         //
 632         // If any part of the EQ has changed appreciably since last frame,
 633         // we fade smoothly during the course of this frame.
 634         const float bass_db = eq_level_db[bus_index][EQ_BAND_BASS];
 635         const float mid_db = eq_level_db[bus_index][EQ_BAND_MID];
 636         const float treble_db = eq_level_db[bus_index][EQ_BAND_TREBLE];
 637
 638         const float last_bass_db = last_eq_level_db[bus_index][EQ_BAND_BASS];
 639         const float last_mid_db = last_eq_level_db[bus_index][EQ_BAND_MID];
 640         const float last_treble_db = last_eq_level_db[bus_index][EQ_BAND_TREBLE];
 641
 642         assert(samples_bus->size() % 2 == 0);
 643         const unsigned num_samples = samples_bus->size() / 2;
 644
 645         if (fabs(mid_db - last_mid_db) < 1e-3) {
 646                 // Constant over this frame.
 647                 const float gain = from_db(mid_db);
 648                 for (size_t i = 0; i < samples_bus->size(); ++i) {
 649                         (*samples_bus)[i] *= gain;
 650                 }
 651         } else {
 652                 // We need to do a fade.
 653                 float gain = from_db(last_mid_db);
 654                 const float gain_inc = pow(from_db(mid_db - last_mid_db), 1.0 / num_samples);
 655                 for (size_t i = 0; i < num_samples; ++i) {
 656                         (*samples_bus)[i * 2 + 0] *= gain;
 657                         (*samples_bus)[i * 2 + 1] *= gain;
 658                         gain *= gain_inc;
 659                 }
 660         }
 661
 662         apply_filter_fade(&eq[bus_index][EQ_BAND_BASS], samples_bus->data(), num_samples, bass_freq_hz, bass_db - mid_db, last_bass_db - last_mid_db);
 663         apply_filter_fade(&eq[bus_index][EQ_BAND_TREBLE], samples_bus->data(), num_samples, treble_freq_hz, treble_db - mid_db, last_treble_db - last_mid_db);
 664
 665         last_eq_level_db[bus_index][EQ_BAND_BASS] = bass_db;
 666         last_eq_level_db[bus_index][EQ_BAND_MID] = mid_db;
 667         last_eq_level_db[bus_index][EQ_BAND_TREBLE] = treble_db;
 668 }
 669
 670 void AudioMixer::add_bus_to_master(unsigned bus_index, const vector<float> &samples_bus, vector<float> *samples_out)
 671 {
 672         assert(samples_bus.size() == samples_out->size());
 673         assert(samples_bus.size() % 2 == 0);
 674         unsigned num_samples = samples_bus.size() / 2;
 675         if (fabs(fader_volume_db[bus_index] - last_fader_volume_db[bus_index]) > 1e-3) {
 676                 // The volume has changed; do a fade over the course of this frame.
 677                 // (We might have some numerical issues here, but it seems to sound OK.)
 678                 // For the purpose of fading here, the silence floor is set to -90 dB
 679                 // (the fader only goes to -84).
 680                 float old_volume = from_db(max<float>(last_fader_volume_db[bus_index], -90.0f));
 681                 float volume = from_db(max<float>(fader_volume_db[bus_index], -90.0f));
 682
 683                 float volume_inc = pow(volume / old_volume, 1.0 / num_samples);
 684                 volume = old_volume;
 685                 if (bus_index == 0) {
 686                         for (unsigned i = 0; i < num_samples; ++i) {
 687                                 (*samples_out)[i * 2 + 0] = samples_bus[i * 2 + 0] * volume;
 688                                 (*samples_out)[i * 2 + 1] = samples_bus[i * 2 + 1] * volume;
 689                                 volume *= volume_inc;
 690                         }
 691                 } else {
 692                         for (unsigned i = 0; i < num_samples; ++i) {
 693                                 (*samples_out)[i * 2 + 0] += samples_bus[i * 2 + 0] * volume;
 694                                 (*samples_out)[i * 2 + 1] += samples_bus[i * 2 + 1] * volume;
 695                                 volume *= volume_inc;
 696                         }
 697                 }
 698         } else {
 699                 float volume = from_db(fader_volume_db[bus_index]);
 700                 if (bus_index == 0) {
 701                         for (unsigned i = 0; i < num_samples; ++i) {
 702                                 (*samples_out)[i * 2 + 0] = samples_bus[i * 2 + 0] * volume;
 703                                 (*samples_out)[i * 2 + 1] = samples_bus[i * 2 + 1] * volume;
 704                         }
 705                 } else {
 706                         for (unsigned i = 0; i < num_samples; ++i) {
 707                                 (*samples_out)[i * 2 + 0] += samples_bus[i * 2 + 0] * volume;
 708                                 (*samples_out)[i * 2 + 1] += samples_bus[i * 2 + 1] * volume;
 709                         }
 710                 }
 711         }
 712
 713         last_fader_volume_db[bus_index] = fader_volume_db[bus_index];
 714 }
 715
 716 void AudioMixer::measure_bus_levels(unsigned bus_index, const vector<float> &left, const vector<float> &right)
 717 {
 718         assert(left.size() == right.size());
 719         const float volume = from_db(fader_volume_db[bus_index]);
 720         const float peak_levels[2] = {
 721                 find_peak(left.data(), left.size()) * volume,
 722                 find_peak(right.data(), right.size()) * volume
 723         };
 724         for (unsigned channel = 0; channel < 2; ++channel) {
 725                 // Compute the current value, including hold and falloff.
 726                 // The constants are borrowed from zita-mu1 by Fons Adriaensen.
 727                 static constexpr float hold_sec = 0.5f;
 728                 static constexpr float falloff_db_sec = 15.0f;  // dB/sec falloff after hold.
 729                 float current_peak;
 730                 PeakHistory &history = peak_history[bus_index][channel];
 731                 history.historic_peak = max(history.historic_peak, peak_levels[channel]);
 732                 if (history.age_seconds < hold_sec) {
 733                         current_peak = history.last_peak;
 734                 } else {
 735                         current_peak = history.last_peak * from_db(-falloff_db_sec * (history.age_seconds - hold_sec));
 736                 }
 737
 738                 // See if we have a new peak to replace the old (possibly falling) one.
 739                 if (peak_levels[channel] > current_peak) {
 740                         history.last_peak = peak_levels[channel];
 741                         history.age_seconds = 0.0f;  // Not 100% correct, but more than good enough given our frame sizes.
 742                         current_peak = peak_levels[channel];
 743                 } else {
 744                         history.age_seconds += float(left.size()) / OUTPUT_FREQUENCY;
 745                 }
 746                 history.current_level = peak_levels[channel];
 747                 history.current_peak = current_peak;
 748         }
 749 }
 750
 751 void AudioMixer::update_meters(const vector<float> &samples)
 752 {
 753         // Upsample 4x to find interpolated peak.
 754         peak_resampler.inp_data = const_cast<float *>(samples.data());
 755         peak_resampler.inp_count = samples.size() / 2;
 756
 757         vector<float> interpolated_samples;
 758         interpolated_samples.resize(samples.size());
 759         {
 760                 lock_guard<mutex> lock(audio_measure_mutex);
 761
 762                 while (peak_resampler.inp_count > 0) {  // About four iterations.
 763                         peak_resampler.out_data = &interpolated_samples[0];
 764                         peak_resampler.out_count = interpolated_samples.size() / 2;
 765                         peak_resampler.process();
 766                         size_t out_stereo_samples = interpolated_samples.size() / 2 - peak_resampler.out_count;
 767                         peak = max<float>(peak, find_peak(interpolated_samples.data(), out_stereo_samples * 2));
 768                         peak_resampler.out_data = nullptr;
 769                 }
 770         }
 771
 772         // Find R128 levels and L/R correlation.
 773         vector<float> left, right;
 774         deinterleave_samples(samples, &left, &right);
 775         float *ptrs[] = { left.data(), right.data() };
 776         {
 777                 lock_guard<mutex> lock(audio_measure_mutex);
 778                 r128.process(left.size(), ptrs);
 779                 correlation.process_samples(samples);
 780         }
 781
 782         send_audio_level_callback();
 783 }
 784
 785 void AudioMixer::reset_meters()
 786 {
 787         lock_guard<mutex> lock(audio_measure_mutex);
 788         peak_resampler.reset();
 789         peak = 0.0f;
 790         r128.reset();
 791         r128.integr_start();
 792         correlation.reset();
 793 }
 794
 795 void AudioMixer::send_audio_level_callback()
 796 {
 797         if (audio_level_callback == nullptr) {
 798                 return;
 799         }
 800
 801         lock_guard<mutex> lock(audio_measure_mutex);
 802         double loudness_s = r128.loudness_S();
 803         double loudness_i = r128.integrated();
 804         double loudness_range_low = r128.range_min();
 805         double loudness_range_high = r128.range_max();
 806
 807         vector<BusLevel> bus_levels;
 808         bus_levels.resize(input_mapping.buses.size());
 809         {
 810                 lock_guard<mutex> lock(compressor_mutex);
 811                 for (unsigned bus_index = 0; bus_index < bus_levels.size(); ++bus_index) {
 812                         bus_levels[bus_index].current_level_dbfs[0] = to_db(peak_history[bus_index][0].current_level);
 813                         bus_levels[bus_index].current_level_dbfs[1] = to_db(peak_history[bus_index][1].current_level);
 814                         bus_levels[bus_index].peak_level_dbfs[0] = to_db(peak_history[bus_index][0].current_peak);
 815                         bus_levels[bus_index].peak_level_dbfs[1] = to_db(peak_history[bus_index][1].current_peak);
 816                         bus_levels[bus_index].historic_peak_dbfs = to_db(
 817                                 max(peak_history[bus_index][0].historic_peak,
 818                                     peak_history[bus_index][1].historic_peak));
 819                         bus_levels[bus_index].gain_staging_db = gain_staging_db[bus_index];
 820                         if (compressor_enabled[bus_index]) {
 821                                 bus_levels[bus_index].compressor_attenuation_db = -to_db(compressor[bus_index]->get_attenuation());
 822                         } else {
 823                                 bus_levels[bus_index].compressor_attenuation_db = 0.0;
 824                         }
 825                 }
 826         }
 827
 828         audio_level_callback(loudness_s, to_db(peak), bus_levels,
 829                 loudness_i, loudness_range_low, loudness_range_high,
 830                 to_db(final_makeup_gain),
 831                 correlation.get_correlation());
 832 }
 833
 834 map<DeviceSpec, DeviceInfo> AudioMixer::get_devices()
 835 {
 836         lock_guard<timed_mutex> lock(audio_mutex);
 837
 838         map<DeviceSpec, DeviceInfo> devices;
 839         for (unsigned card_index = 0; card_index < num_cards; ++card_index) {
 840                 const DeviceSpec spec{ InputSourceType::CAPTURE_CARD, card_index };
 841                 const AudioDevice *device = &video_cards[card_index];
 842                 DeviceInfo info;
 843                 info.display_name = device->display_name;
 844                 info.num_channels = 8;
 845                 devices.insert(make_pair(spec, info));
 846         }
 847         vector<ALSAPool::Device> available_alsa_devices = alsa_pool.get_devices();
 848         for (unsigned card_index = 0; card_index < available_alsa_devices.size(); ++card_index) {
 849                 const DeviceSpec spec{ InputSourceType::ALSA_INPUT, card_index };
 850                 const ALSAPool::Device &device = available_alsa_devices[card_index];
 851                 DeviceInfo info;
 852                 info.display_name = device.display_name();
 853                 info.num_channels = device.num_channels;
 854                 info.alsa_name = device.name;
 855                 info.alsa_info = device.info;
 856                 info.alsa_address = device.address;
 857                 devices.insert(make_pair(spec, info));
 858         }
 859         return devices;
 860 }
 861
 862 void AudioMixer::set_display_name(DeviceSpec device_spec, const string &name)
 863 {
 864         AudioDevice *device = find_audio_device(device_spec);
 865
 866         lock_guard<timed_mutex> lock(audio_mutex);
 867         device->display_name = name;
 868 }
 869
 870 void AudioMixer::serialize_device(DeviceSpec device_spec, DeviceSpecProto *device_spec_proto)
 871 {
 872         lock_guard<timed_mutex> lock(audio_mutex);
 873         switch (device_spec.type) {
 874                 case InputSourceType::SILENCE:
 875                         device_spec_proto->set_type(DeviceSpecProto::SILENCE);
 876                         break;
 877                 case InputSourceType::CAPTURE_CARD:
 878                         device_spec_proto->set_type(DeviceSpecProto::CAPTURE_CARD);
 879                         device_spec_proto->set_index(device_spec.index);
 880                         device_spec_proto->set_display_name(video_cards[device_spec.index].display_name);
 881                         break;
 882                 case InputSourceType::ALSA_INPUT:
 883                         alsa_pool.serialize_device(device_spec.index, device_spec_proto);
 884                         break;
 885         }
 886 }
 887
 888 void AudioMixer::set_simple_input(unsigned card_index)
 889 {
 890         InputMapping new_input_mapping;
 891         InputMapping::Bus input;
 892         input.name = "Main";
 893         input.device.type = InputSourceType::CAPTURE_CARD;
 894         input.device.index = card_index;
 895         input.source_channel[0] = 0;
 896         input.source_channel[1] = 1;
 897
 898         new_input_mapping.buses.push_back(input);
 899
 900         lock_guard<timed_mutex> lock(audio_mutex);
 901         current_mapping_mode = MappingMode::SIMPLE;
 902         set_input_mapping_lock_held(new_input_mapping);
 903         fader_volume_db[0] = 0.0f;
 904 }
 905
 906 unsigned AudioMixer::get_simple_input() const
 907 {
 908         lock_guard<timed_mutex> lock(audio_mutex);
 909         if (input_mapping.buses.size() == 1 &&
 910             input_mapping.buses[0].device.type == InputSourceType::CAPTURE_CARD &&
 911             input_mapping.buses[0].source_channel[0] == 0 &&
 912             input_mapping.buses[0].source_channel[1] == 1) {
 913                 return input_mapping.buses[0].device.index;
 914         } else {
 915                 return numeric_limits<unsigned>::max();
 916         }
 917 }
 918
 919 void AudioMixer::set_input_mapping(const InputMapping &new_input_mapping)
 920 {
 921         lock_guard<timed_mutex> lock(audio_mutex);
 922         set_input_mapping_lock_held(new_input_mapping);
 923         current_mapping_mode = MappingMode::MULTICHANNEL;
 924 }
 925
 926 AudioMixer::MappingMode AudioMixer::get_mapping_mode() const
 927 {
 928         lock_guard<timed_mutex> lock(audio_mutex);
 929         return current_mapping_mode;
 930 }
 931
 932 void AudioMixer::set_input_mapping_lock_held(const InputMapping &new_input_mapping)
 933 {
 934         map<DeviceSpec, set<unsigned>> interesting_channels;
 935         for (const InputMapping::Bus &bus : new_input_mapping.buses) {
 936                 if (bus.device.type == InputSourceType::CAPTURE_CARD ||
 937                     bus.device.type == InputSourceType::ALSA_INPUT) {
 938                         for (unsigned channel = 0; channel < 2; ++channel) {
 939                                 if (bus.source_channel[channel] != -1) {
 940                                         interesting_channels[bus.device].insert(bus.source_channel[channel]);
 941                                 }
 942                         }
 943                 }
 944         }
 945
 946         // Reset resamplers for all cards that don't have the exact same state as before.
 947         for (unsigned card_index = 0; card_index < MAX_VIDEO_CARDS; ++card_index) {
 948                 const DeviceSpec device_spec{InputSourceType::CAPTURE_CARD, card_index};
 949                 AudioDevice *device = find_audio_device(device_spec);
 950                 if (device->interesting_channels != interesting_channels[device_spec]) {
 951                         device->interesting_channels = interesting_channels[device_spec];
 952                         reset_resampler_mutex_held(device_spec);
 953                 }
 954         }
 955         for (unsigned card_index = 0; card_index < MAX_ALSA_CARDS; ++card_index) {
 956                 const DeviceSpec device_spec{InputSourceType::ALSA_INPUT, card_index};
 957                 AudioDevice *device = find_audio_device(device_spec);
 958                 if (interesting_channels[device_spec].empty()) {
 959                         alsa_pool.release_device(card_index);
 960                 } else {
 961                         alsa_pool.hold_device(card_index);
 962                 }
 963                 if (device->interesting_channels != interesting_channels[device_spec]) {
 964                         device->interesting_channels = interesting_channels[device_spec];
 965                         alsa_pool.reset_device(device_spec.index);
 966                         reset_resampler_mutex_held(device_spec);
 967                 }
 968         }
 969
 970         input_mapping = new_input_mapping;
 971 }
 972
 973 InputMapping AudioMixer::get_input_mapping() const
 974 {
 975         lock_guard<timed_mutex> lock(audio_mutex);
 976         return input_mapping;
 977 }
 978
 979 void AudioMixer::reset_peak(unsigned bus_index)
 980 {
 981         lock_guard<timed_mutex> lock(audio_mutex);
 982         for (unsigned channel = 0; channel < 2; ++channel) {
 983                 PeakHistory &history = peak_history[bus_index][channel];
 984                 history.current_level = 0.0f;
 985                 history.historic_peak = 0.0f;
 986                 history.current_peak = 0.0f;
 987                 history.last_peak = 0.0f;
 988                 history.age_seconds = 0.0f;
 989         }
 990 }
 991
 992 AudioMixer *global_audio_mixer = nullptr;