18 // For SSE, we set the denormals-as-zero flag instead.
19 #define early_undenormalise(sample)
21 #else // !defined(__SSE__)
27 #define early_undenormalise(sample) { \
29 uf.f = float(sample); \
30 if ((uf.i&0x60000000)==0) sample=0.0f; \
33 #endif // !_defined(__SSE__)
48 uses coefficients grabbed from
49 RBJs audio eq cookbook:
50 http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
54 float cutoff_freq = omega;
55 cutoff_freq = min(cutoff_freq, (float)M_PI);
56 cutoff_freq = max(cutoff_freq, 0.001f);
57 calcSinCos(cutoff_freq, &sn, &cs);
58 if (resonance <= 0) resonance = 0.001f;
61 // Faster version of real_resonance = resonance ^ (1 / order).
62 // pow(), at least on current GCC, is pretty slow.
63 float real_resonance = resonance;
64 switch (filter_order) {
69 real_resonance = sqrt(real_resonance);
72 real_resonance = sqrt(real_resonance);
75 real_resonance = cbrt(real_resonance);
81 float real_resonance = pow(resonance, 1.0f / filter_order);
84 float alpha = float(sn / (2 * real_resonance));
92 a1 = a2 = b1 = b2 = 0.0; //identity filter
104 b0 = (1 + cs) * 0.5f;
135 case FILTER_PEAKING_EQ:
144 case FILTER_LOW_SHELF:
145 b0 = A * ((A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha);
146 b1 = 2 * A * ((A - 1) - (A + 1)*cs );
147 b2 = A * ((A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha);
148 a0 = (A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha ;
149 a1 = -2 * ((A - 1) + (A + 1)*cs );
150 a2 = (A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha ;
153 case FILTER_HIGH_SHELF:
154 b0 = A * ((A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha);
155 b1 = -2 * A * ((A - 1) + (A + 1)*cs );
156 b2 = A * ((A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha);
157 a0 = (A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha ;
158 a1 = 2 * ((A - 1) - (A + 1)*cs );
159 a2 = (A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha ;
163 //unknown filter type
168 const float invA0 = 1.0f / a0;
179 // Feed this to gnuplot to get a graph of the frequency response.
180 const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
181 printf("set xrange [2:%f]; ", Fs2);
182 printf("set yrange [-80:20]; ");
183 printf("set log x; ");
184 printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
185 printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
186 printf("db(x) = 20*log10(x); ");
187 printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
191 void Filter::init(FilterType type, int order)
194 filter_order = order;
195 if (filtertype == FILTER_NONE) filter_order = 0;
196 if (filter_order == 0) filtertype = FILTER_NONE;
198 //reset feedback buffer
199 for (unsigned i = 0; i < filter_order; i++) {
200 feedback[i].d0 = feedback[i].d1 = 0.0f;
205 void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
207 void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
211 const unsigned stride = 1;
212 unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
213 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
215 assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
217 // Apply the filter FILTER_ORDER times.
218 for (unsigned j = 0; j < filter_order; j++) {
219 float d0 = feedback[j].d0;
220 float d1 = feedback[j].d1;
221 float *inout_ptr = inout_buf;
223 // Render n_samples mono samples. Unrolling manually by a
224 // factor four seemingly helps a lot, perhaps because it
225 // lets the CPU overlap arithmetic and memory operations
226 // better, or perhaps simply because the loop overhead is
228 for (unsigned i = n_samples >> 2; i; i--) {
234 d0 = b1*in - a1*out + d1;
241 d0 = b1*in - a1*out + d1;
248 d0 = b1*in - a1*out + d1;
255 d0 = b1*in - a1*out + d1;
259 early_undenormalise(d0); //do denormalization step
260 early_undenormalise(d1);
266 _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
270 void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
272 //render buf_size mono samples
274 assert(buf_size % 4 == 0);
276 if (filter_order == 0)
279 this->set_linear_cutoff(cutoff);
280 this->set_resonance(resonance);
282 this->render_chunk(inout_buf, buf_size);
285 void StereoFilter::init(FilterType type, int new_order)
288 parm_filter.init(type, new_order);
289 memset(feedback, 0, sizeof(feedback));
291 for (unsigned i = 0; i < 2; ++i) {
292 filters[i].init(type, new_order);
297 void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance, float dbgain_normalized)
300 if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
303 unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
304 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
306 parm_filter.set_linear_cutoff(cutoff);
307 parm_filter.set_resonance(resonance);
308 parm_filter.set_dbgain_normalized(dbgain_normalized);
309 parm_filter.update();
311 __m128 b0 = _mm_set1_ps(parm_filter.b0);
312 __m128 b1 = _mm_set1_ps(parm_filter.b1);
313 __m128 b2 = _mm_set1_ps(parm_filter.b2);
314 __m128 a1 = _mm_set1_ps(parm_filter.a1);
315 __m128 a2 = _mm_set1_ps(parm_filter.a2);
317 // Apply the filter FILTER_ORDER times.
318 for (unsigned j = 0; j < parm_filter.filter_order; j++) {
319 __m128 d0 = feedback[j].d0;
320 __m128 d1 = feedback[j].d1;
321 __m64 *inout_ptr = (__m64 *)inout_left_ptr;
323 __m128 in = _mm_set1_ps(0.0f), out;
324 for (unsigned i = n_samples; i; i--) {
325 in = _mm_loadl_pi(in, inout_ptr);
326 out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
327 _mm_storel_pi(inout_ptr, out);
328 d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
329 d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
336 _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
338 if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
341 for (unsigned i = 0; i < 2; ++i) {
342 filters[i].set_linear_cutoff(cutoff);
343 filters[i].set_resonance(resonance);
345 filters[i].render_chunk(inout_left_ptr, n_samples, 2);
354 Find the transfer function for an IIR biquad. This is relatively basic signal
355 processing, but for completeness, here's the rationale for the function:
357 The basic system of an IIR biquad looks like this, for input x[n], output y[n]
358 and constant filter coefficients [ab][0-2]:
360 a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
362 Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
363 DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
364 due to the linearity and shift properties of the DFT:
366 a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
368 Simple factorization and reorganization yields
370 Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
372 and Y[w] / X[w] is by definition the filter's _transfer function_
373 (customarily denoted by H(w)), ie. the complex factor it applies to the
374 frequency component w. The absolute value of the transfer function is
375 the frequency response, ie. how much frequency w is boosted or weakened.
377 (This derivation usually goes via the Z-transform and not the DFT, but the
378 idea is exactly the same; the Z-transform is just a bit more general.)
380 Sending a signal through first one filter and then through another one
381 will naturally be equivalent to a filter with the transfer function equal
382 to the pointwise multiplication of the two filters, so for N-order filters
383 we need to raise the answer to the Nth power.
386 complex<double> Filter::evaluate_transfer_function(float omega)
388 complex<float> z = exp(complex<float>(0.0f, omega));
389 complex<float> z2 = z * z;
390 return pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);