git.sesse.net Git - nageru/blob - nageru/filter.cpp

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <string.h>
   5 #include <algorithm>
   6 #include <complex>
   7
   8 #include "defs.h"
   9 #include "shared/shared_defs.h"
  10
  11 #ifdef __SSE__
  12 #include <mmintrin.h>
  13 #endif
  14
  15 #include "filter.h"
  16
  17 using namespace std;
  18
  19 #ifdef __SSE__
  20
  21 // For SSE, we set the denormals-as-zero flag instead.
  22 #define early_undenormalise(sample)
  23
  24 #else  // !defined(__SSE__)
  25
  26 union uint_float {
  27         float f;
  28         unsigned int i;
  29 };
  30 #define early_undenormalise(sample) { \
  31          uint_float uf; \
  32          uf.f = float(sample); \
  33          if ((uf.i&0x60000000)==0) sample=0.0f; \
  34 }
  35
  36 #endif  // !_defined(__SSE__)
  37
  38 Filter::Filter()
  39 {
  40         omega = M_PI;
  41         resonance = 0.01f;
  42         A = 1.0f;
  43
  44         init(FILTER_NONE, 1);
  45         update();
  46 }
  47
  48 void Filter::update()
  49 {
  50         /*
  51           uses coefficients grabbed from
  52           RBJs audio eq cookbook:
  53           http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
  54         */
  55
  56         float sn, cs;
  57         float cutoff_freq = omega;
  58         cutoff_freq = min(cutoff_freq, (float)M_PI);
  59         cutoff_freq = max(cutoff_freq, 0.001f);
  60         calcSinCos(cutoff_freq, &sn, &cs);
  61         if (resonance <= 0) resonance = 0.001f;
  62
  63 #ifdef __GNUC__
  64         // Faster version of real_resonance = resonance ^ (1 / order).
  65         // pow(), at least on current GCC, is pretty slow.
  66         float real_resonance = resonance;
  67         switch (filter_order) {
  68         case 0:
  69         case 1:
  70                 break;
  71         case 4:
  72                 real_resonance = sqrt(real_resonance);
  73                 // Fall through.
  74         case 2:
  75                 real_resonance = sqrt(real_resonance);
  76                 break;
  77         case 3:
  78                 real_resonance = cbrt(real_resonance);
  79                 break;
  80         default:
  81                 assert(false);
  82         }
  83 #else
  84         float real_resonance = pow(resonance, 1.0f / filter_order);
  85 #endif
  86
  87         float alpha = float(sn / (2 * real_resonance));
  88         float a0 = 1 + alpha;
  89         a1 = -2 * cs;
  90         a2 = 1 - alpha;
  91
  92         switch (filtertype) {
  93         case FILTER_NONE:
  94                 a0 = b0 = 1.0f;
  95                 a1 = a2 = b1 = b2 = 0.0; //identity filter
  96                 break;
  97
  98         case FILTER_LPF:
  99                 b0 = (1 - cs) * 0.5f;
 100                 b1 = 1 - cs;
 101                 b2 = b0;
 102                 // a1 = -2*cs;
 103                 // a2 = 1 - alpha;
 104                 break;
 105
 106         case FILTER_HPF:
 107                 b0 = (1 + cs) * 0.5f;
 108                 b1 = -(1 + cs);
 109                 b2 = b0;
 110                 // a1 = -2*cs;
 111                 // a2 = 1 - alpha;
 112                 break;
 113
 114         case FILTER_BPF:
 115                 b0 = alpha;
 116                 b1 = 0.0f;
 117                 b2 = -alpha;
 118                 // a1 = -2*cs;
 119                 // a2 = 1 - alpha;
 120                 break;
 121
 122         case FILTER_NOTCH:
 123                 b0 = 1.0f;
 124                 b1 = -2*cs;
 125                 b2 = 1.0f;
 126                 // a1 = -2*cs;
 127                 // a2 = 1 - alpha;
 128                 break;
 129
 130         case FILTER_APF:
 131                 b0 = 1 - alpha;
 132                 b1 = -2*cs;
 133                 b2 = 1.0f;
 134                 // a1 = -2*cs;
 135                 // a2 = 1 - alpha;
 136                 break;
 137
 138         case FILTER_PEAKING_EQ:
 139                 b0 = 1 + alpha * A;
 140                 b1 = -2*cs;
 141                 b2 = 1 - alpha * A;
 142                 a0 = 1 + alpha / A;
 143                 // a1 = -2*cs;
 144                 a2 = 1 - alpha / A;
 145                 break;
 146
 147         case FILTER_LOW_SHELF:
 148                 b0 =      A * ((A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha);
 149                 b1 =  2 * A * ((A - 1) - (A + 1)*cs                      );
 150                 b2 =      A * ((A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha);
 151                 a0 =           (A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha ;
 152                 a1 =     -2 * ((A - 1) + (A + 1)*cs                      );
 153                 a2 =           (A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha ;
 154                 break;
 155
 156         case FILTER_HIGH_SHELF:
 157                 b0 =      A * ((A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha);
 158                 b1 = -2 * A * ((A - 1) + (A + 1)*cs                      );
 159                 b2 =      A * ((A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha);
 160                 a0 =           (A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha ;
 161                 a1 =      2 * ((A - 1) - (A + 1)*cs                      );
 162                 a2 =           (A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha ;
 163                 break;
 164
 165         default:
 166                 //unknown filter type
 167                 assert(false);
 168                 break;
 169         }
 170
 171         const float invA0 = 1.0f / a0;
 172         b0 *= invA0;
 173         b1 *= invA0;
 174         b2 *= invA0;
 175         a1 *= invA0;
 176         a2 *= invA0;
 177 }
 178
 179 #ifndef NDEBUG
 180 void Filter::debug()
 181 {
 182         // Feed this to gnuplot to get a graph of the frequency response.
 183         const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
 184         printf("set xrange [2:%f]; ", Fs2);
 185         printf("set yrange [-80:20]; ");
 186         printf("set log x; ");
 187         printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
 188         printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
 189         printf("db(x) = 20*log10(x); ");
 190         printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
 191 }
 192 #endif
 193
 194 void Filter::init(FilterType type, int order)
 195 {
 196         filtertype = type;
 197         filter_order = order;
 198         if (filtertype == FILTER_NONE) filter_order = 0;
 199         if (filter_order == 0) filtertype = FILTER_NONE;
 200
 201         //reset feedback buffer
 202         for (unsigned i = 0; i < filter_order; i++) {
 203                 feedback[i].d0 = feedback[i].d1 = 0.0f;
 204         }
 205 }
 206
 207 #ifdef __SSE__
 208 void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
 209 #else
 210 void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
 211 #endif
 212 {
 213 #ifdef __SSE__
 214         const unsigned stride = 1;
 215         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 216         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 217 #endif
 218         assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
 219
 220         // Apply the filter FILTER_ORDER times.
 221         for (unsigned j = 0; j < filter_order; j++) {
 222                 float d0 = feedback[j].d0;
 223                 float d1 = feedback[j].d1;
 224                 float *inout_ptr = inout_buf;
 225
 226                 // Render n_samples mono samples. Unrolling manually by a
 227                 // factor four seemingly helps a lot, perhaps because it
 228                 // lets the CPU overlap arithmetic and memory operations
 229                 // better, or perhaps simply because the loop overhead is
 230                 // high.
 231                 for (unsigned i = n_samples >> 2; i; i--) {
 232                         float in, out;
 233
 234                         in = *inout_ptr;
 235                         out = b0*in + d0;
 236                         *inout_ptr = out;
 237                         d0 = b1*in - a1*out + d1;
 238                         d1 = b2*in - a2*out;
 239                         inout_ptr += stride;
 240
 241                         in = *inout_ptr;
 242                         out = b0*in + d0;
 243                         *inout_ptr = out;
 244                         d0 = b1*in - a1*out + d1;
 245                         d1 = b2*in - a2*out;
 246                         inout_ptr += stride;
 247
 248                         in = *inout_ptr;
 249                         out = b0*in + d0;
 250                         *inout_ptr = out;
 251                         d0 = b1*in - a1*out + d1;
 252                         d1 = b2*in - a2*out;
 253                         inout_ptr += stride;
 254
 255                         in = *inout_ptr;
 256                         out = b0*in + d0;
 257                         *inout_ptr = out;
 258                         d0 = b1*in - a1*out + d1;
 259                         d1 = b2*in - a2*out;
 260                         inout_ptr += stride;
 261                 }
 262                 early_undenormalise(d0); //do denormalization step
 263                 early_undenormalise(d1);
 264                 feedback[j].d0 = d0;
 265                 feedback[j].d1 = d1;
 266         }
 267
 268 #ifdef __SSE__
 269         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 270 #endif
 271 }
 272
 273 void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
 274 {
 275         //render buf_size mono samples
 276 #ifdef __SSE__
 277         assert(buf_size % 4 == 0);
 278 #endif
 279         if (filter_order == 0)
 280                 return;
 281
 282         this->set_linear_cutoff(cutoff);
 283         this->set_resonance(resonance);
 284         this->update();
 285         this->render_chunk(inout_buf, buf_size);
 286 }
 287
 288 void StereoFilter::init(FilterType type, int new_order)
 289 {
 290 #ifdef __SSE__
 291         parm_filter.init(type, new_order);
 292         memset(feedback, 0, sizeof(feedback));
 293 #else
 294         for (unsigned i = 0; i < 2; ++i) {
 295                 filters[i].init(type, new_order);
 296         }
 297 #endif
 298 }
 299
 300 void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance, float dbgain_normalized)
 301 {
 302 #ifdef __SSE__
 303         if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
 304                 return;
 305
 306         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 307         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 308
 309         parm_filter.set_linear_cutoff(cutoff);
 310         parm_filter.set_resonance(resonance);
 311         parm_filter.set_dbgain_normalized(dbgain_normalized);
 312         parm_filter.update();
 313
 314         __m128 b0 = _mm_set1_ps(parm_filter.b0);
 315         __m128 b1 = _mm_set1_ps(parm_filter.b1);
 316         __m128 b2 = _mm_set1_ps(parm_filter.b2);
 317         __m128 a1 = _mm_set1_ps(parm_filter.a1);
 318         __m128 a2 = _mm_set1_ps(parm_filter.a2);
 319
 320         // Apply the filter FILTER_ORDER times.
 321         for (unsigned j = 0; j < parm_filter.filter_order; j++) {
 322                 __m128 d0 = feedback[j].d0;
 323                 __m128 d1 = feedback[j].d1;
 324                 __m64 *inout_ptr = (__m64 *)inout_left_ptr;
 325
 326                 __m128 in = _mm_set1_ps(0.0f), out;
 327                 for (unsigned i = n_samples; i; i--) {
 328                         in = _mm_loadl_pi(in, inout_ptr);
 329                         out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
 330                         _mm_storel_pi(inout_ptr, out);
 331                         d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
 332                         d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
 333                         ++inout_ptr;
 334                 }
 335                 feedback[j].d0 = d0;
 336                 feedback[j].d1 = d1;
 337         }
 338
 339         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 340 #else
 341         if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
 342                 return;
 343
 344         for (unsigned i = 0; i < 2; ++i) {
 345                 filters[i].set_linear_cutoff(cutoff);
 346                 filters[i].set_resonance(resonance);
 347                 filters[i].update();
 348                 filters[i].render_chunk(inout_left_ptr, n_samples, 2);
 349
 350                 ++inout_left_ptr;
 351         }
 352 #endif
 353 }
 354
 355 /*
 356
 357   Find the transfer function for an IIR biquad. This is relatively basic signal
 358   processing, but for completeness, here's the rationale for the function:
 359
 360   The basic system of an IIR biquad looks like this, for input x[n], output y[n]
 361   and constant filter coefficients [ab][0-2]:
 362
 363     a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
 364
 365   Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
 366   DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
 367   due to the linearity and shift properties of the DFT:
 368
 369     a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
 370
 371   Simple factorization and reorganization yields
 372
 373     Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
 374
 375   and Y[w] / X[w] is by definition the filter's _transfer function_
 376   (customarily denoted by H(w)), ie. the complex factor it applies to the
 377   frequency component w. The absolute value of the transfer function is
 378   the frequency response, ie. how much frequency w is boosted or weakened.
 379
 380   (This derivation usually goes via the Z-transform and not the DFT, but the
 381   idea is exactly the same; the Z-transform is just a bit more general.)
 382
 383   Sending a signal through first one filter and then through another one
 384   will naturally be equivalent to a filter with the transfer function equal
 385   to the pointwise multiplication of the two filters, so for N-order filters
 386   we need to raise the answer to the Nth power.
 387
 388 */
 389 complex<double> Filter::evaluate_transfer_function(float omega)
 390 {
 391         complex<float> z = exp(complex<float>(0.0f, omega));
 392         complex<float> z2 = z * z;
 393         return pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);
 394 }