From 12f9082b06c037b76dc3a653643bdaeaea89f2d2 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 7 Nov 2015 01:41:56 +0100 Subject: [PATCH] Add a 150 Hz lo-cut filter in front of the compressor. --- Makefile | 2 +- filter.cpp | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++++ filter.h | 134 ++++++++++++++++++++ mixer.cpp | 7 ++ mixer.h | 3 + 5 files changed, 505 insertions(+), 1 deletion(-) create mode 100644 filter.cpp create mode 100644 filter.h diff --git a/Makefile b/Makefile index 422fe0a..9cd5240 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ OBJS=glwidget.o main.o mainwindow.o vumeter.o lrameter.o vu_common.o OBJS += glwidget.moc.o mainwindow.moc.o vumeter.moc.o lrameter.moc.o # Mixer objects -OBJS += h264encode.o mixer.o bmusb/bmusb.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampler.o httpd.o ebu_r128_proc.o flags.o image_input.o stereocompressor.o +OBJS += h264encode.o mixer.o bmusb/bmusb.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampler.o httpd.o ebu_r128_proc.o flags.o image_input.o stereocompressor.o filter.o %.o: %.cpp $(CXX) -MMD -MP $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $< diff --git a/filter.cpp b/filter.cpp new file mode 100644 index 0000000..a5cd767 --- /dev/null +++ b/filter.cpp @@ -0,0 +1,360 @@ +#include +#include +#include +#include +#include +#include + +#ifdef __SSE__ +#include +#endif + +#include "filter.h" + +#ifdef __SSE__ + +// For SSE, we set the denormals-as-zero flag instead. +#define early_undenormalise(sample) + +#else // !defined(__SSE__) + +union uint_float { + float f; + unsigned int i; +}; +#define early_undenormalise(sample) { \ + uint_float uf; \ + uf.f = float(sample); \ + if ((uf.i&0x60000000)==0) sample=0.0f; \ +} + +#endif // !_defined(__SSE__) + +Filter::Filter() +{ + omega = M_PI; + resonance = 0.01f; + + init(FILTER_NONE, 1); + update(); +} + +void Filter::update() +{ + /* + uses coefficients grabbed from + RBJs audio eq cookbook: + http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt + */ + + float sn, cs; + float cutoff_freq = omega; + cutoff_freq = std::min(cutoff_freq, (float)M_PI); + cutoff_freq = std::max(cutoff_freq, 0.001f); + calcSinCos(cutoff_freq, &sn, &cs); + if (resonance <= 0) resonance = 0.001f; + +#ifdef __GNUC__ + // Faster version of real_resonance = resonance ^ (1 / order). + // pow(), at least on current GCC, is pretty slow. + float real_resonance = resonance; + switch (filter_order) { + case 0: + case 1: + break; + case 4: + real_resonance = sqrt(real_resonance); + // Fall through. + case 2: + real_resonance = sqrt(real_resonance); + break; + case 3: + real_resonance = cbrt(real_resonance); + break; + default: + assert(false); + } +#else + float real_resonance = pow(resonance, 1.0f / filter_order); +#endif + + float alpha = float(sn / (2 * real_resonance)); + float a0 = 1 + alpha; + a1 = -2 * cs; + a2 = 1 - alpha; + + switch (filtertype) { + case FILTER_NONE: + a0 = b0 = 1.0f; + a1 = a2 = b1 = b2 = 0.0; //identity filter + break; + + case FILTER_LPF: + b0 = (1 - cs) * 0.5f; + b1 = 1 - cs; + b2 = b0; + // a1 = -2*cs; + // a2 = 1 - alpha; + break; + + case FILTER_HPF: + b0 = (1 + cs) * 0.5f; + b1 = -(1 + cs); + b2 = b0; + // a1 = -2*cs; + // a2 = 1 - alpha; + break; + + case FILTER_BPF: + b0 = alpha; + b1 = 0.0f; + b2 = -alpha; + // a1 = -2*cs; + // a2 = 1 - alpha; + break; + + case FILTER_NOTCH: + b0 = 1.0f; + b1 = -2*cs; + b2 = 1.0f; + // a1 = -2*cs; + // a2 = 1 - alpha; + break; + + case FILTER_APF: + b0 = 1 - alpha; + b1 = -2*cs; + b2 = 1.0f; + // a1 = -2*cs; + // a2 = 1 - alpha; + break; + + default: + //unknown filter type + assert(false); + break; + } + + const float invA0 = 1.0f / a0; + b0 *= invA0; + b1 *= invA0; + b2 *= invA0; + a1 *= invA0; + a2 *= invA0; +} + +#ifndef NDEBUG +void Filter::debug() +{ + // Feed this to gnuplot to get a graph of the frequency response. + const float Fs2 = OUTPUT_FREQUENCY * 0.5f; + printf("set xrange [2:%f]; ", Fs2); + printf("set yrange [-80:20]; "); + printf("set log x; "); + printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2); + printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); "); + printf("db(x) = 20*log10(x); "); + printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2); +} +#endif + +void Filter::init(FilterType type, int order) +{ + filtertype = type; + filter_order = order; + if (filtertype == FILTER_NONE) filter_order = 0; + if (filter_order == 0) filtertype = FILTER_NONE; + + //reset feedback buffer + for (unsigned i = 0; i < filter_order; i++) { + feedback[i].d0 = feedback[i].d1 = 0.0f; + } +} + +#ifdef __SSE__ +void Filter::render_chunk(float *inout_buf, unsigned int n_samples) +#else +void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride) +#endif +{ +#ifdef __SSE__ + const unsigned stride = 1; + unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE(); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); +#endif + assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4 + + // Apply the filter FILTER_ORDER times. + for (unsigned j = 0; j < filter_order; j++) { + float d0 = feedback[j].d0; + float d1 = feedback[j].d1; + float *inout_ptr = inout_buf; + + // Render n_samples mono samples. Unrolling manually by a + // factor four seemingly helps a lot, perhaps because it + // lets the CPU overlap arithmetic and memory operations + // better, or perhaps simply because the loop overhead is + // high. + for (unsigned i = n_samples >> 2; i; i--) { + float in, out; + + in = *inout_ptr; + out = b0*in + d0; + *inout_ptr = out; + d0 = b1*in - a1*out + d1; + d1 = b2*in - a2*out; + inout_ptr += stride; + + in = *inout_ptr; + out = b0*in + d0; + *inout_ptr = out; + d0 = b1*in - a1*out + d1; + d1 = b2*in - a2*out; + inout_ptr += stride; + + in = *inout_ptr; + out = b0*in + d0; + *inout_ptr = out; + d0 = b1*in - a1*out + d1; + d1 = b2*in - a2*out; + inout_ptr += stride; + + in = *inout_ptr; + out = b0*in + d0; + *inout_ptr = out; + d0 = b1*in - a1*out + d1; + d1 = b2*in - a2*out; + inout_ptr += stride; + } + early_undenormalise(d0); //do denormalization step + early_undenormalise(d1); + feedback[j].d0 = d0; + feedback[j].d1 = d1; + } + +#ifdef __SSE__ + _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode); +#endif +} + +void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance) +{ + //render buf_size mono samples +#ifdef __SSE__ + assert(buf_size % 4 == 0); +#endif + if (filter_order == 0) + return; + + this->set_linear_cutoff(cutoff); + this->set_resonance(resonance); + this->update(); + this->render_chunk(inout_buf, buf_size); +} + +void StereoFilter::init(FilterType type, int new_order) +{ +#ifdef __SSE__ + parm_filter.init(type, new_order); + memset(feedback, 0, sizeof(feedback)); +#else + for (unsigned i = 0; i < 2; ++i) { + filters[i].init(type, 0, new_order, 0); + } +#endif +} + +void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance) +{ +#ifdef __SSE__ + if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0) + return; + + unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE(); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + parm_filter.set_linear_cutoff(cutoff); + parm_filter.set_resonance(resonance); + parm_filter.update(); + + __m128 b0 = _mm_set1_ps(parm_filter.b0); + __m128 b1 = _mm_set1_ps(parm_filter.b1); + __m128 b2 = _mm_set1_ps(parm_filter.b2); + __m128 a1 = _mm_set1_ps(parm_filter.a1); + __m128 a2 = _mm_set1_ps(parm_filter.a2); + + // Apply the filter FILTER_ORDER times. + for (unsigned j = 0; j < parm_filter.filter_order; j++) { + __m128 d0 = feedback[j].d0; + __m128 d1 = feedback[j].d1; + __m64 *inout_ptr = (__m64 *)inout_left_ptr; + + __m128 in = _mm_set1_ps(0.0f), out; + for (unsigned i = n_samples; i; i--) { + in = _mm_loadl_pi(in, inout_ptr); + out = _mm_add_ps(_mm_mul_ps(b0, in), d0); + _mm_storel_pi(inout_ptr, out); + d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1); + d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out)); + ++inout_ptr; + } + feedback[j].d0 = d0; + feedback[j].d1 = d1; + } + + _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode); +#else + if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0) + return; + + for (unsigned i = 0; i < 2; ++i) { + filters[i].set_linear_cutoff(cutoff); + filters[i].set_resonance(resonance); + filters[i].update(); + filters[i].render_chunk(inout_left_ptr, n_samples, 2); + + ++inout_left_ptr; + } +#endif +} + +/* + + Find the transfer function for an IIR biquad. This is relatively basic signal + processing, but for completeness, here's the rationale for the function: + + The basic system of an IIR biquad looks like this, for input x[n], output y[n] + and constant filter coefficients [ab][0-2]: + + a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n] + + Taking the discrete Fourier transform (DFT) of both sides (denoting by convention + DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields, + due to the linearity and shift properties of the DFT: + + a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w] + + Simple factorization and reorganization yields + + Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0) + + and Y[w] / X[w] is by definition the filter's _transfer function_ + (customarily denoted by H(w)), ie. the complex factor it applies to the + frequency component w. The absolute value of the transfer function is + the frequency response, ie. how much frequency w is boosted or weakened. + + (This derivation usually goes via the Z-transform and not the DFT, but the + idea is exactly the same; the Z-transform is just a bit more general.) + + Sending a signal through first one filter and then through another one + will naturally be equivalent to a filter with the transfer function equal + to the pointwise multiplication of the two filters, so for N-order filters + we need to raise the answer to the Nth power. + +*/ +std::complex Filter::evaluate_transfer_function(float omega) +{ + std::complex z = exp(std::complex(0.0f, omega)); + std::complex z2 = z * z; + return std::pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order); +} diff --git a/filter.h b/filter.h new file mode 100644 index 0000000..5e455d2 --- /dev/null +++ b/filter.h @@ -0,0 +1,134 @@ +// Filter class: +// a cascaded biquad IIR filter +// +// Special cases for type=LPF/BPF/HPF: +// +// Butterworth filter: order=1, resonance=1/sqrt(2) +// Linkwitz-Riley filter: order=2, resonance=1/2 + +#ifndef _FILTER_H +#define _FILTER_H 1 + +#define _USE_MATH_DEFINES +#include +#include +#include + +#ifdef __SSE__ +#include +#endif + +#include "defs.h" + +namespace std { +template struct complex; +} // namespace std + +enum FilterType +{ + FILTER_NONE = 0, + FILTER_LPF, + FILTER_HPF, + FILTER_BPF, + FILTER_NOTCH, + FILTER_APF, +}; + +#define FILTER_MAX_ORDER 4 + +class Filter +{ + friend class StereoFilter; + friend class SplittingStereoFilter; +public: + Filter(); + + void init(FilterType type, int new_order); + + void update(); //update coefficients +#ifndef NDEBUG + void debug(); +#endif + std::complex evaluate_transfer_function(float omega); + + FilterType get_type() { return filtertype; } + unsigned get_order() { return filter_order; } + + // cutoff is taken to be in the [0..pi> (see set_linear_cutoff, below). + void render(float *inout_array, unsigned int buf_size, float cutoff, float resonance); + + // Set cutoff, from [0..pi> (where pi is the Nyquist frequency). + // Overridden by render() if you use that. + void set_linear_cutoff(float new_omega) + { + omega = new_omega; + } + + void set_resonance(float new_resonance) + { + resonance = new_resonance; + } + +#ifdef __SSE__ + // We don't need the stride argument for SSE, as StereoFilter + // has its own SSE implementations. + void render_chunk(float *inout_buf, unsigned nSamples); +#else + void render_chunk(float *inout_buf, unsigned nSamples, unsigned stride = 1); +#endif + + FilterType filtertype; +private: + float omega; //which is 2*Pi*frequency /SAMPLE_RATE + float resonance; + +public: + unsigned filter_order; +private: + float b0, b1, b2, a1, a2; //filter coefs + + struct FeedbackBuffer { + float d0,d1; //feedback buffers + } feedback[FILTER_MAX_ORDER]; + + void calcSinCos(float omega, float *sinVal, float *cosVal) + { + *sinVal = (float)sin(omega); + *cosVal = (float)cos(omega); + } +}; + + +class StereoFilter +{ +public: + void init(FilterType type, int new_order); + + void render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance); +#ifndef NDEBUG +#ifdef __SSE__ + void debug() { parm_filter.debug(); } +#else + void debug() { filters[0].debug(); } +#endif +#endif +#ifdef __SSE__ + FilterType get_type() { return parm_filter.get_type(); } +#else + FilterType get_type() { return filters[0].get_type(); } +#endif + +private: +#ifdef __SSE__ + // We only use the filter to calculate coefficients; we don't actually + // use its feedbacks. + Filter parm_filter; + struct SIMDFeedbackBuffer { + __m128 d0, d1; + } feedback[FILTER_MAX_ORDER]; +#else + Filter filters[2]; +#endif +}; + +#endif // !defined(_FILTER_H) diff --git a/mixer.cpp b/mixer.cpp index d903592..03cf99d 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -149,6 +149,8 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) r128.init(2, OUTPUT_FREQUENCY); r128.integr_start(); + + locut.init(FILTER_HPF, 2); } Mixer::~Mixer() @@ -538,6 +540,11 @@ void Mixer::process_audio_one_frame() } } + // Cut away everything under 150 Hz; we don't need it for voice, + // and it will reduce headroom and confuse the compressor. + // (In particular, any hums at 50 or 60 Hz should be dampened.) + locut.render(samples_out.data(), samples_out.size() / 2, 150.0 * 2.0 * M_PI / OUTPUT_FREQUENCY, 0.5f); + // Apply a level compressor to get the general level right. // Basically, if it's over about -40 dBFS, we squeeze it down to that level // (or more precisely, near it, since we don't use infinite ratio), diff --git a/mixer.h b/mixer.h index cdc6f1c..c312ee3 100644 --- a/mixer.h +++ b/mixer.h @@ -30,6 +30,7 @@ #include "theme.h" #include "timebase.h" #include "stereocompressor.h" +#include "filter.h" class H264Encoder; class QSurface; @@ -205,6 +206,8 @@ private: // TODO: Implement oversampled peak detection. float peak = 0.0f; + StereoFilter locut; // Cutoff 150 Hz, 24 dB/oct. + // First compressor; takes us up to about -12 dBFS. StereoCompressor level_compressor; }; -- 2.39.2