git.sesse.net Git - nageru/blob - filter.cpp

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <string.h>
   5 #include <algorithm>
   6 #include <complex>
   7
   8 #include "defs.h"
   9
  10 #ifdef __SSE__
  11 #include <mmintrin.h>
  12 #endif
  13
  14 #include "filter.h"
  15
  16 using namespace std;
  17
  18 #ifdef __SSE__
  19
  20 // For SSE, we set the denormals-as-zero flag instead.
  21 #define early_undenormalise(sample)
  22
  23 #else  // !defined(__SSE__)
  24
  25 union uint_float {
  26         float f;
  27         unsigned int i;
  28 };
  29 #define early_undenormalise(sample) { \
  30          uint_float uf; \
  31          uf.f = float(sample); \
  32          if ((uf.i&0x60000000)==0) sample=0.0f; \
  33 }
  34
  35 #endif  // !_defined(__SSE__)
  36
  37 Filter::Filter()
  38 {
  39         omega = M_PI;
  40         resonance = 0.01f;
  41         A = 1.0f;
  42
  43         init(FILTER_NONE, 1);
  44         update();
  45 }
  46
  47 void Filter::update()
  48 {
  49         /*
  50           uses coefficients grabbed from
  51           RBJs audio eq cookbook:
  52           http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
  53         */
  54
  55         float sn, cs;
  56         float cutoff_freq = omega;
  57         cutoff_freq = min(cutoff_freq, (float)M_PI);
  58         cutoff_freq = max(cutoff_freq, 0.001f);
  59         calcSinCos(cutoff_freq, &sn, &cs);
  60         if (resonance <= 0) resonance = 0.001f;
  61
  62 #ifdef __GNUC__
  63         // Faster version of real_resonance = resonance ^ (1 / order).
  64         // pow(), at least on current GCC, is pretty slow.
  65         float real_resonance = resonance;
  66         switch (filter_order) {
  67         case 0:
  68         case 1:
  69                 break;
  70         case 4:
  71                 real_resonance = sqrt(real_resonance);
  72                 // Fall through.
  73         case 2:
  74                 real_resonance = sqrt(real_resonance);
  75                 break;
  76         case 3:
  77                 real_resonance = cbrt(real_resonance);
  78                 break;
  79         default:
  80                 assert(false);
  81         }
  82 #else
  83         float real_resonance = pow(resonance, 1.0f / filter_order);
  84 #endif
  85
  86         float alpha = float(sn / (2 * real_resonance));
  87         float a0 = 1 + alpha;
  88         a1 = -2 * cs;
  89         a2 = 1 - alpha;
  90
  91         switch (filtertype) {
  92         case FILTER_NONE:
  93                 a0 = b0 = 1.0f;
  94                 a1 = a2 = b1 = b2 = 0.0; //identity filter
  95                 break;
  96
  97         case FILTER_LPF:
  98                 b0 = (1 - cs) * 0.5f;
  99                 b1 = 1 - cs;
 100                 b2 = b0;
 101                 // a1 = -2*cs;
 102                 // a2 = 1 - alpha;
 103                 break;
 104
 105         case FILTER_HPF:
 106                 b0 = (1 + cs) * 0.5f;
 107                 b1 = -(1 + cs);
 108                 b2 = b0;
 109                 // a1 = -2*cs;
 110                 // a2 = 1 - alpha;
 111                 break;
 112
 113         case FILTER_BPF:
 114                 b0 = alpha;
 115                 b1 = 0.0f;
 116                 b2 = -alpha;
 117                 // a1 = -2*cs;
 118                 // a2 = 1 - alpha;
 119                 break;
 120
 121         case FILTER_NOTCH:
 122                 b0 = 1.0f;
 123                 b1 = -2*cs;
 124                 b2 = 1.0f;
 125                 // a1 = -2*cs;
 126                 // a2 = 1 - alpha;
 127                 break;
 128
 129         case FILTER_APF:
 130                 b0 = 1 - alpha;
 131                 b1 = -2*cs;
 132                 b2 = 1.0f;
 133                 // a1 = -2*cs;
 134                 // a2 = 1 - alpha;
 135                 break;
 136
 137         case FILTER_PEAKING_EQ:
 138                 b0 = 1 + alpha * A;
 139                 b1 = -2*cs;
 140                 b2 = 1 - alpha * A;
 141                 a0 = 1 + alpha / A;
 142                 // a1 = -2*cs;
 143                 a2 = 1 - alpha / A;
 144                 break;
 145
 146         case FILTER_LOW_SHELF:
 147                 b0 =      A * ((A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha);
 148                 b1 =  2 * A * ((A - 1) - (A + 1)*cs                      );
 149                 b2 =      A * ((A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha);
 150                 a0 =           (A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha ;
 151                 a1 =     -2 * ((A - 1) + (A + 1)*cs                      );
 152                 a2 =           (A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha ;
 153                 break;
 154
 155         case FILTER_HIGH_SHELF:
 156                 b0 =      A * ((A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha);
 157                 b1 = -2 * A * ((A - 1) + (A + 1)*cs                      );
 158                 b2 =      A * ((A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha);
 159                 a0 =           (A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha ;
 160                 a1 =      2 * ((A - 1) - (A + 1)*cs                      );
 161                 a2 =           (A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha ;
 162                 break;
 163
 164         default:
 165                 //unknown filter type
 166                 assert(false);
 167                 break;
 168         }
 169
 170         const float invA0 = 1.0f / a0;
 171         b0 *= invA0;
 172         b1 *= invA0;
 173         b2 *= invA0;
 174         a1 *= invA0;
 175         a2 *= invA0;
 176 }
 177
 178 #ifndef NDEBUG
 179 void Filter::debug()
 180 {
 181         // Feed this to gnuplot to get a graph of the frequency response.
 182         const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
 183         printf("set xrange [2:%f]; ", Fs2);
 184         printf("set yrange [-80:20]; ");
 185         printf("set log x; ");
 186         printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
 187         printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
 188         printf("db(x) = 20*log10(x); ");
 189         printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
 190 }
 191 #endif
 192
 193 void Filter::init(FilterType type, int order)
 194 {
 195         filtertype = type;
 196         filter_order = order;
 197         if (filtertype == FILTER_NONE) filter_order = 0;
 198         if (filter_order == 0) filtertype = FILTER_NONE;
 199
 200         //reset feedback buffer
 201         for (unsigned i = 0; i < filter_order; i++) {
 202                 feedback[i].d0 = feedback[i].d1 = 0.0f;
 203         }
 204 }
 205
 206 #ifdef __SSE__
 207 void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
 208 #else
 209 void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
 210 #endif
 211 {
 212 #ifdef __SSE__
 213         const unsigned stride = 1;
 214         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 215         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 216 #endif
 217         assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
 218
 219         // Apply the filter FILTER_ORDER times.
 220         for (unsigned j = 0; j < filter_order; j++) {
 221                 float d0 = feedback[j].d0;
 222                 float d1 = feedback[j].d1;
 223                 float *inout_ptr = inout_buf;
 224
 225                 // Render n_samples mono samples. Unrolling manually by a
 226                 // factor four seemingly helps a lot, perhaps because it
 227                 // lets the CPU overlap arithmetic and memory operations
 228                 // better, or perhaps simply because the loop overhead is
 229                 // high.
 230                 for (unsigned i = n_samples >> 2; i; i--) {
 231                         float in, out;
 232
 233                         in = *inout_ptr;
 234                         out = b0*in + d0;
 235                         *inout_ptr = out;
 236                         d0 = b1*in - a1*out + d1;
 237                         d1 = b2*in - a2*out;
 238                         inout_ptr += stride;
 239
 240                         in = *inout_ptr;
 241                         out = b0*in + d0;
 242                         *inout_ptr = out;
 243                         d0 = b1*in - a1*out + d1;
 244                         d1 = b2*in - a2*out;
 245                         inout_ptr += stride;
 246
 247                         in = *inout_ptr;
 248                         out = b0*in + d0;
 249                         *inout_ptr = out;
 250                         d0 = b1*in - a1*out + d1;
 251                         d1 = b2*in - a2*out;
 252                         inout_ptr += stride;
 253
 254                         in = *inout_ptr;
 255                         out = b0*in + d0;
 256                         *inout_ptr = out;
 257                         d0 = b1*in - a1*out + d1;
 258                         d1 = b2*in - a2*out;
 259                         inout_ptr += stride;
 260                 }
 261                 early_undenormalise(d0); //do denormalization step
 262                 early_undenormalise(d1);
 263                 feedback[j].d0 = d0;
 264                 feedback[j].d1 = d1;
 265         }
 266
 267 #ifdef __SSE__
 268         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 269 #endif
 270 }
 271
 272 void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
 273 {
 274         //render buf_size mono samples
 275 #ifdef __SSE__
 276         assert(buf_size % 4 == 0);
 277 #endif
 278         if (filter_order == 0)
 279                 return;
 280
 281         this->set_linear_cutoff(cutoff);
 282         this->set_resonance(resonance);
 283         this->update();
 284         this->render_chunk(inout_buf, buf_size);
 285 }
 286
 287 void StereoFilter::init(FilterType type, int new_order)
 288 {
 289 #ifdef __SSE__
 290         parm_filter.init(type, new_order);
 291         memset(feedback, 0, sizeof(feedback));
 292 #else
 293         for (unsigned i = 0; i < 2; ++i) {
 294                 filters[i].init(type, new_order);
 295         }
 296 #endif
 297 }
 298
 299 void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance, float dbgain_normalized)
 300 {
 301 #ifdef __SSE__
 302         if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
 303                 return;
 304
 305         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 306         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 307
 308         parm_filter.set_linear_cutoff(cutoff);
 309         parm_filter.set_resonance(resonance);
 310         parm_filter.set_dbgain_normalized(dbgain_normalized);
 311         parm_filter.update();
 312
 313         __m128 b0 = _mm_set1_ps(parm_filter.b0);
 314         __m128 b1 = _mm_set1_ps(parm_filter.b1);
 315         __m128 b2 = _mm_set1_ps(parm_filter.b2);
 316         __m128 a1 = _mm_set1_ps(parm_filter.a1);
 317         __m128 a2 = _mm_set1_ps(parm_filter.a2);
 318
 319         // Apply the filter FILTER_ORDER times.
 320         for (unsigned j = 0; j < parm_filter.filter_order; j++) {
 321                 __m128 d0 = feedback[j].d0;
 322                 __m128 d1 = feedback[j].d1;
 323                 __m64 *inout_ptr = (__m64 *)inout_left_ptr;
 324
 325                 __m128 in = _mm_set1_ps(0.0f), out;
 326                 for (unsigned i = n_samples; i; i--) {
 327                         in = _mm_loadl_pi(in, inout_ptr);
 328                         out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
 329                         _mm_storel_pi(inout_ptr, out);
 330                         d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
 331                         d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
 332                         ++inout_ptr;
 333                 }
 334                 feedback[j].d0 = d0;
 335                 feedback[j].d1 = d1;
 336         }
 337
 338         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 339 #else
 340         if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
 341                 return;
 342
 343         for (unsigned i = 0; i < 2; ++i) {
 344                 filters[i].set_linear_cutoff(cutoff);
 345                 filters[i].set_resonance(resonance);
 346                 filters[i].update();
 347                 filters[i].render_chunk(inout_left_ptr, n_samples, 2);
 348
 349                 ++inout_left_ptr;
 350         }
 351 #endif
 352 }
 353
 354 /*
 355
 356   Find the transfer function for an IIR biquad. This is relatively basic signal
 357   processing, but for completeness, here's the rationale for the function:
 358
 359   The basic system of an IIR biquad looks like this, for input x[n], output y[n]
 360   and constant filter coefficients [ab][0-2]:
 361
 362     a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
 363
 364   Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
 365   DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
 366   due to the linearity and shift properties of the DFT:
 367
 368     a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
 369
 370   Simple factorization and reorganization yields
 371
 372     Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
 373
 374   and Y[w] / X[w] is by definition the filter's _transfer function_
 375   (customarily denoted by H(w)), ie. the complex factor it applies to the
 376   frequency component w. The absolute value of the transfer function is
 377   the frequency response, ie. how much frequency w is boosted or weakened.
 378
 379   (This derivation usually goes via the Z-transform and not the DFT, but the
 380   idea is exactly the same; the Z-transform is just a bit more general.)
 381
 382   Sending a signal through first one filter and then through another one
 383   will naturally be equivalent to a filter with the transfer function equal
 384   to the pointwise multiplication of the two filters, so for N-order filters
 385   we need to raise the answer to the Nth power.
 386
 387 */
 388 complex<double> Filter::evaluate_transfer_function(float omega)
 389 {
 390         complex<float> z = exp(complex<float>(0.0f, omega));
 391         complex<float> z2 = z * z;
 392         return pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);
 393 }