git.sesse.net Git - nageru/blob - filter.cpp

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <assert.h>
   5 #include <complex>
   6 #include <algorithm>
   7
   8 #ifdef __SSE__
   9 #include <mmintrin.h>
  10 #endif
  11
  12 #include "filter.h"
  13
  14 using namespace std;
  15
  16 #ifdef __SSE__
  17
  18 // For SSE, we set the denormals-as-zero flag instead.
  19 #define early_undenormalise(sample)
  20
  21 #else  // !defined(__SSE__)
  22
  23 union uint_float {
  24         float f;
  25         unsigned int i;
  26 };
  27 #define early_undenormalise(sample) { \
  28          uint_float uf; \
  29          uf.f = float(sample); \
  30          if ((uf.i&0x60000000)==0) sample=0.0f; \
  31 }
  32
  33 #endif  // !_defined(__SSE__)
  34
  35 Filter::Filter()
  36 {
  37         omega = M_PI;
  38         resonance = 0.01f;
  39         A = 1.0f;
  40
  41         init(FILTER_NONE, 1);
  42         update();
  43 }
  44
  45 void Filter::update()
  46 {
  47         /*
  48           uses coefficients grabbed from
  49           RBJs audio eq cookbook:
  50           http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
  51         */
  52
  53         float sn, cs;
  54         float cutoff_freq = omega;
  55         cutoff_freq = min(cutoff_freq, (float)M_PI);
  56         cutoff_freq = max(cutoff_freq, 0.001f);
  57         calcSinCos(cutoff_freq, &sn, &cs);
  58         if (resonance <= 0) resonance = 0.001f;
  59
  60 #ifdef __GNUC__
  61         // Faster version of real_resonance = resonance ^ (1 / order).
  62         // pow(), at least on current GCC, is pretty slow.
  63         float real_resonance = resonance;
  64         switch (filter_order) {
  65         case 0:
  66         case 1:
  67                 break;
  68         case 4:
  69                 real_resonance = sqrt(real_resonance);
  70                 // Fall through.
  71         case 2:
  72                 real_resonance = sqrt(real_resonance);
  73                 break;
  74         case 3:
  75                 real_resonance = cbrt(real_resonance);
  76                 break;
  77         default:
  78                 assert(false);
  79         }
  80 #else
  81         float real_resonance = pow(resonance, 1.0f / filter_order);
  82 #endif
  83
  84         float alpha = float(sn / (2 * real_resonance));
  85         float a0 = 1 + alpha;
  86         a1 = -2 * cs;
  87         a2 = 1 - alpha;
  88
  89         switch (filtertype) {
  90         case FILTER_NONE:
  91                 a0 = b0 = 1.0f;
  92                 a1 = a2 = b1 = b2 = 0.0; //identity filter
  93                 break;
  94
  95         case FILTER_LPF:
  96                 b0 = (1 - cs) * 0.5f;
  97                 b1 = 1 - cs;
  98                 b2 = b0;
  99                 // a1 = -2*cs;
 100                 // a2 = 1 - alpha;
 101                 break;
 102
 103         case FILTER_HPF:
 104                 b0 = (1 + cs) * 0.5f;
 105                 b1 = -(1 + cs);
 106                 b2 = b0;
 107                 // a1 = -2*cs;
 108                 // a2 = 1 - alpha;
 109                 break;
 110
 111         case FILTER_BPF:
 112                 b0 = alpha;
 113                 b1 = 0.0f;
 114                 b2 = -alpha;
 115                 // a1 = -2*cs;
 116                 // a2 = 1 - alpha;
 117                 break;
 118
 119         case FILTER_NOTCH:
 120                 b0 = 1.0f;
 121                 b1 = -2*cs;
 122                 b2 = 1.0f;
 123                 // a1 = -2*cs;
 124                 // a2 = 1 - alpha;
 125                 break;
 126
 127         case FILTER_APF:
 128                 b0 = 1 - alpha;
 129                 b1 = -2*cs;
 130                 b2 = 1.0f;
 131                 // a1 = -2*cs;
 132                 // a2 = 1 - alpha;
 133                 break;
 134
 135         case FILTER_PEAKING_EQ:
 136                 b0 = 1 + alpha * A;
 137                 b1 = -2*cs;
 138                 b2 = 1 - alpha * A;
 139                 a0 = 1 + alpha / A;
 140                 // a1 = -2*cs;
 141                 a2 = 1 - alpha / A;
 142                 break;
 143
 144         case FILTER_LOW_SHELF:
 145                 b0 =      A * ((A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha);
 146                 b1 =  2 * A * ((A - 1) - (A + 1)*cs                      );
 147                 b2 =      A * ((A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha);
 148                 a0 =           (A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha ;
 149                 a1 =     -2 * ((A - 1) + (A + 1)*cs                      );
 150                 a2 =           (A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha ;
 151                 break;
 152
 153         case FILTER_HIGH_SHELF:
 154                 b0 =      A * ((A + 1) + (A - 1)*cs + 2 * sqrt(A) * alpha);
 155                 b1 = -2 * A * ((A - 1) + (A + 1)*cs                      );
 156                 b2 =      A * ((A + 1) + (A - 1)*cs - 2 * sqrt(A) * alpha);
 157                 a0 =           (A + 1) - (A - 1)*cs + 2 * sqrt(A) * alpha ;
 158                 a1 =      2 * ((A - 1) - (A + 1)*cs                      );
 159                 a2 =           (A + 1) - (A - 1)*cs - 2 * sqrt(A) * alpha ;
 160                 break;
 161
 162         default:
 163                 //unknown filter type
 164                 assert(false);
 165                 break;
 166         }
 167
 168         const float invA0 = 1.0f / a0;
 169         b0 *= invA0;
 170         b1 *= invA0;
 171         b2 *= invA0;
 172         a1 *= invA0;
 173         a2 *= invA0;
 174 }
 175
 176 #ifndef NDEBUG
 177 void Filter::debug()
 178 {
 179         // Feed this to gnuplot to get a graph of the frequency response.
 180         const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
 181         printf("set xrange [2:%f]; ", Fs2);
 182         printf("set yrange [-80:20]; ");
 183         printf("set log x; ");
 184         printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
 185         printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
 186         printf("db(x) = 20*log10(x); ");
 187         printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
 188 }
 189 #endif
 190
 191 void Filter::init(FilterType type, int order)
 192 {
 193         filtertype = type;
 194         filter_order = order;
 195         if (filtertype == FILTER_NONE) filter_order = 0;
 196         if (filter_order == 0) filtertype = FILTER_NONE;
 197
 198         //reset feedback buffer
 199         for (unsigned i = 0; i < filter_order; i++) {
 200                 feedback[i].d0 = feedback[i].d1 = 0.0f;
 201         }
 202 }
 203
 204 #ifdef __SSE__
 205 void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
 206 #else
 207 void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
 208 #endif
 209 {
 210 #ifdef __SSE__
 211         const unsigned stride = 1;
 212         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 213         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 214 #endif
 215         assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
 216
 217         // Apply the filter FILTER_ORDER times.
 218         for (unsigned j = 0; j < filter_order; j++) {
 219                 float d0 = feedback[j].d0;
 220                 float d1 = feedback[j].d1;
 221                 float *inout_ptr = inout_buf;
 222
 223                 // Render n_samples mono samples. Unrolling manually by a
 224                 // factor four seemingly helps a lot, perhaps because it
 225                 // lets the CPU overlap arithmetic and memory operations
 226                 // better, or perhaps simply because the loop overhead is
 227                 // high.
 228                 for (unsigned i = n_samples >> 2; i; i--) {
 229                         float in, out;
 230
 231                         in = *inout_ptr;
 232                         out = b0*in + d0;
 233                         *inout_ptr = out;
 234                         d0 = b1*in - a1*out + d1;
 235                         d1 = b2*in - a2*out;
 236                         inout_ptr += stride;
 237
 238                         in = *inout_ptr;
 239                         out = b0*in + d0;
 240                         *inout_ptr = out;
 241                         d0 = b1*in - a1*out + d1;
 242                         d1 = b2*in - a2*out;
 243                         inout_ptr += stride;
 244
 245                         in = *inout_ptr;
 246                         out = b0*in + d0;
 247                         *inout_ptr = out;
 248                         d0 = b1*in - a1*out + d1;
 249                         d1 = b2*in - a2*out;
 250                         inout_ptr += stride;
 251
 252                         in = *inout_ptr;
 253                         out = b0*in + d0;
 254                         *inout_ptr = out;
 255                         d0 = b1*in - a1*out + d1;
 256                         d1 = b2*in - a2*out;
 257                         inout_ptr += stride;
 258                 }
 259                 early_undenormalise(d0); //do denormalization step
 260                 early_undenormalise(d1);
 261                 feedback[j].d0 = d0;
 262                 feedback[j].d1 = d1;
 263         }
 264
 265 #ifdef __SSE__
 266         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 267 #endif
 268 }
 269
 270 void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
 271 {
 272         //render buf_size mono samples
 273 #ifdef __SSE__
 274         assert(buf_size % 4 == 0);
 275 #endif
 276         if (filter_order == 0)
 277                 return;
 278
 279         this->set_linear_cutoff(cutoff);
 280         this->set_resonance(resonance);
 281         this->update();
 282         this->render_chunk(inout_buf, buf_size);
 283 }
 284
 285 void StereoFilter::init(FilterType type, int new_order)
 286 {
 287 #ifdef __SSE__
 288         parm_filter.init(type, new_order);
 289         memset(feedback, 0, sizeof(feedback));
 290 #else
 291         for (unsigned i = 0; i < 2; ++i) {
 292                 filters[i].init(type, new_order);
 293         }
 294 #endif
 295 }
 296
 297 void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance, float dbgain_normalized)
 298 {
 299 #ifdef __SSE__
 300         if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
 301                 return;
 302
 303         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 304         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 305
 306         parm_filter.set_linear_cutoff(cutoff);
 307         parm_filter.set_resonance(resonance);
 308         parm_filter.set_dbgain_normalized(dbgain_normalized);
 309         parm_filter.update();
 310
 311         __m128 b0 = _mm_set1_ps(parm_filter.b0);
 312         __m128 b1 = _mm_set1_ps(parm_filter.b1);
 313         __m128 b2 = _mm_set1_ps(parm_filter.b2);
 314         __m128 a1 = _mm_set1_ps(parm_filter.a1);
 315         __m128 a2 = _mm_set1_ps(parm_filter.a2);
 316
 317         // Apply the filter FILTER_ORDER times.
 318         for (unsigned j = 0; j < parm_filter.filter_order; j++) {
 319                 __m128 d0 = feedback[j].d0;
 320                 __m128 d1 = feedback[j].d1;
 321                 __m64 *inout_ptr = (__m64 *)inout_left_ptr;
 322
 323                 __m128 in = _mm_set1_ps(0.0f), out;
 324                 for (unsigned i = n_samples; i; i--) {
 325                         in = _mm_loadl_pi(in, inout_ptr);
 326                         out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
 327                         _mm_storel_pi(inout_ptr, out);
 328                         d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
 329                         d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
 330                         ++inout_ptr;
 331                 }
 332                 feedback[j].d0 = d0;
 333                 feedback[j].d1 = d1;
 334         }
 335
 336         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 337 #else
 338         if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
 339                 return;
 340
 341         for (unsigned i = 0; i < 2; ++i) {
 342                 filters[i].set_linear_cutoff(cutoff);
 343                 filters[i].set_resonance(resonance);
 344                 filters[i].update();
 345                 filters[i].render_chunk(inout_left_ptr, n_samples, 2);
 346
 347                 ++inout_left_ptr;
 348         }
 349 #endif
 350 }
 351
 352 /*
 353
 354   Find the transfer function for an IIR biquad. This is relatively basic signal
 355   processing, but for completeness, here's the rationale for the function:
 356
 357   The basic system of an IIR biquad looks like this, for input x[n], output y[n]
 358   and constant filter coefficients [ab][0-2]:
 359
 360     a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
 361
 362   Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
 363   DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
 364   due to the linearity and shift properties of the DFT:
 365
 366     a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
 367
 368   Simple factorization and reorganization yields
 369
 370     Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
 371
 372   and Y[w] / X[w] is by definition the filter's _transfer function_
 373   (customarily denoted by H(w)), ie. the complex factor it applies to the
 374   frequency component w. The absolute value of the transfer function is
 375   the frequency response, ie. how much frequency w is boosted or weakened.
 376
 377   (This derivation usually goes via the Z-transform and not the DFT, but the
 378   idea is exactly the same; the Z-transform is just a bit more general.)
 379
 380   Sending a signal through first one filter and then through another one
 381   will naturally be equivalent to a filter with the transfer function equal
 382   to the pointwise multiplication of the two filters, so for N-order filters
 383   we need to raise the answer to the Nth power.
 384
 385 */
 386 complex<double> Filter::evaluate_transfer_function(float omega)
 387 {
 388         complex<float> z = exp(complex<float>(0.0f, omega));
 389         complex<float> z2 = z * z;
 390         return pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);
 391 }