+
+ // Cut away everything under 150 Hz; we don't need it for voice,
+ // and it will reduce headroom and confuse the compressor.
+ // (In particular, any hums at 50 or 60 Hz should be dampened.)
+ locut.render(samples_out.data(), samples_out.size() / 2, 150.0 * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f);
+
+ // Apply a level compressor to get the general level right.
+ // Basically, if it's over about -40 dBFS, we squeeze it down to that level
+ // (or more precisely, near it, since we don't use infinite ratio),
+ // then apply a makeup gain to get it to -12 dBFS. -12 dBFS is, of course,
+ // entirely arbitrary, but from practical tests with speech, it seems to
+ // put ut around -23 LUFS, so it's a reasonable starting point for later use.
+ //
+ // TODO: Add the actual compressors/limiters (for taking care of transients)
+ // later in the chain.
+ float threshold = 0.01f; // -40 dBFS.
+ float ratio = 20.0f;
+ float attack_time = 0.1f;
+ float release_time = 10.0f;
+ float makeup_gain = pow(10.0f, 28.0f / 20.0f); // +28 dB takes us to -12 dBFS.
+ level_compressor.process(samples_out.data(), samples_out.size() / 2, threshold, ratio, attack_time, release_time, makeup_gain);
+ last_gain_staging_db = 20.0 * log10(level_compressor.get_attenuation() * makeup_gain);
+
+#if 0
+ printf("level=%f (%+5.2f dBFS) attenuation=%f (%+5.2f dB) end_result=%+5.2f dB\n",
+ level_compressor.get_level(), 20.0 * log10(level_compressor.get_level()),
+ level_compressor.get_attenuation(), 20.0 * log10(level_compressor.get_attenuation()),
+ 20.0 * log10(level_compressor.get_level() * level_compressor.get_attenuation() * makeup_gain));
+#endif
+
+ // Find peak and R128 levels.
+ peak = std::max(peak, find_peak(samples_out));
+ vector<float> left, right;
+ deinterleave_samples(samples_out, &left, &right);
+ float *ptrs[] = { left.data(), right.data() };
+ r128.process(left.size(), ptrs);
+
+ // Actually add the samples to the output.
+ h264_encoder->add_audio(pts_int, move(samples_out));