--- /dev/null
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <complex>
+#include <algorithm>
+
+#ifdef __SSE__
+#include <mmintrin.h>
+#endif
+
+#include "filter.h"
+
+#ifdef __SSE__
+
+// For SSE, we set the denormals-as-zero flag instead.
+#define early_undenormalise(sample)
+
+#else // !defined(__SSE__)
+
+union uint_float {
+ float f;
+ unsigned int i;
+};
+#define early_undenormalise(sample) { \
+ uint_float uf; \
+ uf.f = float(sample); \
+ if ((uf.i&0x60000000)==0) sample=0.0f; \
+}
+
+#endif // !_defined(__SSE__)
+
+Filter::Filter()
+{
+ omega = M_PI;
+ resonance = 0.01f;
+
+ init(FILTER_NONE, 1);
+ update();
+}
+
+void Filter::update()
+{
+ /*
+ uses coefficients grabbed from
+ RBJs audio eq cookbook:
+ http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
+ */
+
+ float sn, cs;
+ float cutoff_freq = omega;
+ cutoff_freq = std::min(cutoff_freq, (float)M_PI);
+ cutoff_freq = std::max(cutoff_freq, 0.001f);
+ calcSinCos(cutoff_freq, &sn, &cs);
+ if (resonance <= 0) resonance = 0.001f;
+
+#ifdef __GNUC__
+ // Faster version of real_resonance = resonance ^ (1 / order).
+ // pow(), at least on current GCC, is pretty slow.
+ float real_resonance = resonance;
+ switch (filter_order) {
+ case 0:
+ case 1:
+ break;
+ case 4:
+ real_resonance = sqrt(real_resonance);
+ // Fall through.
+ case 2:
+ real_resonance = sqrt(real_resonance);
+ break;
+ case 3:
+ real_resonance = cbrt(real_resonance);
+ break;
+ default:
+ assert(false);
+ }
+#else
+ float real_resonance = pow(resonance, 1.0f / filter_order);
+#endif
+
+ float alpha = float(sn / (2 * real_resonance));
+ float a0 = 1 + alpha;
+ a1 = -2 * cs;
+ a2 = 1 - alpha;
+
+ switch (filtertype) {
+ case FILTER_NONE:
+ a0 = b0 = 1.0f;
+ a1 = a2 = b1 = b2 = 0.0; //identity filter
+ break;
+
+ case FILTER_LPF:
+ b0 = (1 - cs) * 0.5f;
+ b1 = 1 - cs;
+ b2 = b0;
+ // a1 = -2*cs;
+ // a2 = 1 - alpha;
+ break;
+
+ case FILTER_HPF:
+ b0 = (1 + cs) * 0.5f;
+ b1 = -(1 + cs);
+ b2 = b0;
+ // a1 = -2*cs;
+ // a2 = 1 - alpha;
+ break;
+
+ case FILTER_BPF:
+ b0 = alpha;
+ b1 = 0.0f;
+ b2 = -alpha;
+ // a1 = -2*cs;
+ // a2 = 1 - alpha;
+ break;
+
+ case FILTER_NOTCH:
+ b0 = 1.0f;
+ b1 = -2*cs;
+ b2 = 1.0f;
+ // a1 = -2*cs;
+ // a2 = 1 - alpha;
+ break;
+
+ case FILTER_APF:
+ b0 = 1 - alpha;
+ b1 = -2*cs;
+ b2 = 1.0f;
+ // a1 = -2*cs;
+ // a2 = 1 - alpha;
+ break;
+
+ default:
+ //unknown filter type
+ assert(false);
+ break;
+ }
+
+ const float invA0 = 1.0f / a0;
+ b0 *= invA0;
+ b1 *= invA0;
+ b2 *= invA0;
+ a1 *= invA0;
+ a2 *= invA0;
+}
+
+#ifndef NDEBUG
+void Filter::debug()
+{
+ // Feed this to gnuplot to get a graph of the frequency response.
+ const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
+ printf("set xrange [2:%f]; ", Fs2);
+ printf("set yrange [-80:20]; ");
+ printf("set log x; ");
+ printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
+ printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
+ printf("db(x) = 20*log10(x); ");
+ printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
+}
+#endif
+
+void Filter::init(FilterType type, int order)
+{
+ filtertype = type;
+ filter_order = order;
+ if (filtertype == FILTER_NONE) filter_order = 0;
+ if (filter_order == 0) filtertype = FILTER_NONE;
+
+ //reset feedback buffer
+ for (unsigned i = 0; i < filter_order; i++) {
+ feedback[i].d0 = feedback[i].d1 = 0.0f;
+ }
+}
+
+#ifdef __SSE__
+void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
+#else
+void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
+#endif
+{
+#ifdef __SSE__
+ const unsigned stride = 1;
+ unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
+ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+#endif
+ assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
+
+ // Apply the filter FILTER_ORDER times.
+ for (unsigned j = 0; j < filter_order; j++) {
+ float d0 = feedback[j].d0;
+ float d1 = feedback[j].d1;
+ float *inout_ptr = inout_buf;
+
+ // Render n_samples mono samples. Unrolling manually by a
+ // factor four seemingly helps a lot, perhaps because it
+ // lets the CPU overlap arithmetic and memory operations
+ // better, or perhaps simply because the loop overhead is
+ // high.
+ for (unsigned i = n_samples >> 2; i; i--) {
+ float in, out;
+
+ in = *inout_ptr;
+ out = b0*in + d0;
+ *inout_ptr = out;
+ d0 = b1*in - a1*out + d1;
+ d1 = b2*in - a2*out;
+ inout_ptr += stride;
+
+ in = *inout_ptr;
+ out = b0*in + d0;
+ *inout_ptr = out;
+ d0 = b1*in - a1*out + d1;
+ d1 = b2*in - a2*out;
+ inout_ptr += stride;
+
+ in = *inout_ptr;
+ out = b0*in + d0;
+ *inout_ptr = out;
+ d0 = b1*in - a1*out + d1;
+ d1 = b2*in - a2*out;
+ inout_ptr += stride;
+
+ in = *inout_ptr;
+ out = b0*in + d0;
+ *inout_ptr = out;
+ d0 = b1*in - a1*out + d1;
+ d1 = b2*in - a2*out;
+ inout_ptr += stride;
+ }
+ early_undenormalise(d0); //do denormalization step
+ early_undenormalise(d1);
+ feedback[j].d0 = d0;
+ feedback[j].d1 = d1;
+ }
+
+#ifdef __SSE__
+ _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
+#endif
+}
+
+void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
+{
+ //render buf_size mono samples
+#ifdef __SSE__
+ assert(buf_size % 4 == 0);
+#endif
+ if (filter_order == 0)
+ return;
+
+ this->set_linear_cutoff(cutoff);
+ this->set_resonance(resonance);
+ this->update();
+ this->render_chunk(inout_buf, buf_size);
+}
+
+void StereoFilter::init(FilterType type, int new_order)
+{
+#ifdef __SSE__
+ parm_filter.init(type, new_order);
+ memset(feedback, 0, sizeof(feedback));
+#else
+ for (unsigned i = 0; i < 2; ++i) {
+ filters[i].init(type, 0, new_order, 0);
+ }
+#endif
+}
+
+void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance)
+{
+#ifdef __SSE__
+ if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
+ return;
+
+ unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
+ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+
+ parm_filter.set_linear_cutoff(cutoff);
+ parm_filter.set_resonance(resonance);
+ parm_filter.update();
+
+ __m128 b0 = _mm_set1_ps(parm_filter.b0);
+ __m128 b1 = _mm_set1_ps(parm_filter.b1);
+ __m128 b2 = _mm_set1_ps(parm_filter.b2);
+ __m128 a1 = _mm_set1_ps(parm_filter.a1);
+ __m128 a2 = _mm_set1_ps(parm_filter.a2);
+
+ // Apply the filter FILTER_ORDER times.
+ for (unsigned j = 0; j < parm_filter.filter_order; j++) {
+ __m128 d0 = feedback[j].d0;
+ __m128 d1 = feedback[j].d1;
+ __m64 *inout_ptr = (__m64 *)inout_left_ptr;
+
+ __m128 in = _mm_set1_ps(0.0f), out;
+ for (unsigned i = n_samples; i; i--) {
+ in = _mm_loadl_pi(in, inout_ptr);
+ out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
+ _mm_storel_pi(inout_ptr, out);
+ d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
+ d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
+ ++inout_ptr;
+ }
+ feedback[j].d0 = d0;
+ feedback[j].d1 = d1;
+ }
+
+ _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
+#else
+ if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
+ return;
+
+ for (unsigned i = 0; i < 2; ++i) {
+ filters[i].set_linear_cutoff(cutoff);
+ filters[i].set_resonance(resonance);
+ filters[i].update();
+ filters[i].render_chunk(inout_left_ptr, n_samples, 2);
+
+ ++inout_left_ptr;
+ }
+#endif
+}
+
+/*
+
+ Find the transfer function for an IIR biquad. This is relatively basic signal
+ processing, but for completeness, here's the rationale for the function:
+
+ The basic system of an IIR biquad looks like this, for input x[n], output y[n]
+ and constant filter coefficients [ab][0-2]:
+
+ a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
+
+ Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
+ DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
+ due to the linearity and shift properties of the DFT:
+
+ a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
+
+ Simple factorization and reorganization yields
+
+ Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
+
+ and Y[w] / X[w] is by definition the filter's _transfer function_
+ (customarily denoted by H(w)), ie. the complex factor it applies to the
+ frequency component w. The absolute value of the transfer function is
+ the frequency response, ie. how much frequency w is boosted or weakened.
+
+ (This derivation usually goes via the Z-transform and not the DFT, but the
+ idea is exactly the same; the Z-transform is just a bit more general.)
+
+ Sending a signal through first one filter and then through another one
+ will naturally be equivalent to a filter with the transfer function equal
+ to the pointwise multiplication of the two filters, so for N-order filters
+ we need to raise the answer to the Nth power.
+
+*/
+std::complex<double> Filter::evaluate_transfer_function(float omega)
+{
+ std::complex<float> z = exp(std::complex<float>(0.0f, omega));
+ std::complex<float> z2 = z * z;
+ return std::pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);
+}
--- /dev/null
+// Filter class:
+// a cascaded biquad IIR filter
+//
+// Special cases for type=LPF/BPF/HPF:
+//
+// Butterworth filter: order=1, resonance=1/sqrt(2)
+// Linkwitz-Riley filter: order=2, resonance=1/2
+
+#ifndef _FILTER_H
+#define _FILTER_H 1
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <complex>
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#include "defs.h"
+
+namespace std {
+template <typename _Tp> struct complex;
+} // namespace std
+
+enum FilterType
+{
+ FILTER_NONE = 0,
+ FILTER_LPF,
+ FILTER_HPF,
+ FILTER_BPF,
+ FILTER_NOTCH,
+ FILTER_APF,
+};
+
+#define FILTER_MAX_ORDER 4
+
+class Filter
+{
+ friend class StereoFilter;
+ friend class SplittingStereoFilter;
+public:
+ Filter();
+
+ void init(FilterType type, int new_order);
+
+ void update(); //update coefficients
+#ifndef NDEBUG
+ void debug();
+#endif
+ std::complex<double> evaluate_transfer_function(float omega);
+
+ FilterType get_type() { return filtertype; }
+ unsigned get_order() { return filter_order; }
+
+ // cutoff is taken to be in the [0..pi> (see set_linear_cutoff, below).
+ void render(float *inout_array, unsigned int buf_size, float cutoff, float resonance);
+
+ // Set cutoff, from [0..pi> (where pi is the Nyquist frequency).
+ // Overridden by render() if you use that.
+ void set_linear_cutoff(float new_omega)
+ {
+ omega = new_omega;
+ }
+
+ void set_resonance(float new_resonance)
+ {
+ resonance = new_resonance;
+ }
+
+#ifdef __SSE__
+ // We don't need the stride argument for SSE, as StereoFilter
+ // has its own SSE implementations.
+ void render_chunk(float *inout_buf, unsigned nSamples);
+#else
+ void render_chunk(float *inout_buf, unsigned nSamples, unsigned stride = 1);
+#endif
+
+ FilterType filtertype;
+private:
+ float omega; //which is 2*Pi*frequency /SAMPLE_RATE
+ float resonance;
+
+public:
+ unsigned filter_order;
+private:
+ float b0, b1, b2, a1, a2; //filter coefs
+
+ struct FeedbackBuffer {
+ float d0,d1; //feedback buffers
+ } feedback[FILTER_MAX_ORDER];
+
+ void calcSinCos(float omega, float *sinVal, float *cosVal)
+ {
+ *sinVal = (float)sin(omega);
+ *cosVal = (float)cos(omega);
+ }
+};
+
+
+class StereoFilter
+{
+public:
+ void init(FilterType type, int new_order);
+
+ void render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance);
+#ifndef NDEBUG
+#ifdef __SSE__
+ void debug() { parm_filter.debug(); }
+#else
+ void debug() { filters[0].debug(); }
+#endif
+#endif
+#ifdef __SSE__
+ FilterType get_type() { return parm_filter.get_type(); }
+#else
+ FilterType get_type() { return filters[0].get_type(); }
+#endif
+
+private:
+#ifdef __SSE__
+ // We only use the filter to calculate coefficients; we don't actually
+ // use its feedbacks.
+ Filter parm_filter;
+ struct SIMDFeedbackBuffer {
+ __m128 d0, d1;
+ } feedback[FILTER_MAX_ORDER];
+#else
+ Filter filters[2];
+#endif
+};
+
+#endif // !defined(_FILTER_H)