git.sesse.net Git - nageru/blob - filter.cpp

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <assert.h>
   5 #include <complex>
   6 #include <algorithm>
   7
   8 #ifdef __SSE__
   9 #include <mmintrin.h>
  10 #endif
  11
  12 #include "filter.h"
  13
  14 using namespace std;
  15
  16 #ifdef __SSE__
  17
  18 // For SSE, we set the denormals-as-zero flag instead.
  19 #define early_undenormalise(sample)
  20
  21 #else  // !defined(__SSE__)
  22
  23 union uint_float {
  24         float f;
  25         unsigned int i;
  26 };
  27 #define early_undenormalise(sample) { \
  28          uint_float uf; \
  29          uf.f = float(sample); \
  30          if ((uf.i&0x60000000)==0) sample=0.0f; \
  31 }
  32
  33 #endif  // !_defined(__SSE__)
  34
  35 Filter::Filter()
  36 {
  37         omega = M_PI;
  38         resonance = 0.01f;
  39
  40         init(FILTER_NONE, 1);
  41         update();
  42 }
  43
  44 void Filter::update()
  45 {
  46         /*
  47           uses coefficients grabbed from
  48           RBJs audio eq cookbook:
  49           http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
  50         */
  51
  52         float sn, cs;
  53         float cutoff_freq = omega;
  54         cutoff_freq = min(cutoff_freq, (float)M_PI);
  55         cutoff_freq = max(cutoff_freq, 0.001f);
  56         calcSinCos(cutoff_freq, &sn, &cs);
  57         if (resonance <= 0) resonance = 0.001f;
  58
  59 #ifdef __GNUC__
  60         // Faster version of real_resonance = resonance ^ (1 / order).
  61         // pow(), at least on current GCC, is pretty slow.
  62         float real_resonance = resonance;
  63         switch (filter_order) {
  64         case 0:
  65         case 1:
  66                 break;
  67         case 4:
  68                 real_resonance = sqrt(real_resonance);
  69                 // Fall through.
  70         case 2:
  71                 real_resonance = sqrt(real_resonance);
  72                 break;
  73         case 3:
  74                 real_resonance = cbrt(real_resonance);
  75                 break;
  76         default:
  77                 assert(false);
  78         }
  79 #else
  80         float real_resonance = pow(resonance, 1.0f / filter_order);
  81 #endif
  82
  83         float alpha = float(sn / (2 * real_resonance));
  84         float a0 = 1 + alpha;
  85         a1 = -2 * cs;
  86         a2 = 1 - alpha;
  87
  88         switch (filtertype) {
  89         case FILTER_NONE:
  90                 a0 = b0 = 1.0f;
  91                 a1 = a2 = b1 = b2 = 0.0; //identity filter
  92                 break;
  93
  94         case FILTER_LPF:
  95                 b0 = (1 - cs) * 0.5f;
  96                 b1 = 1 - cs;
  97                 b2 = b0;
  98                 // a1 = -2*cs;
  99                 // a2 = 1 - alpha;
 100                 break;
 101
 102         case FILTER_HPF:
 103                 b0 = (1 + cs) * 0.5f;
 104                 b1 = -(1 + cs);
 105                 b2 = b0;
 106                 // a1 = -2*cs;
 107                 // a2 = 1 - alpha;
 108                 break;
 109
 110         case FILTER_BPF:
 111                 b0 = alpha;
 112                 b1 = 0.0f;
 113                 b2 = -alpha;
 114                 // a1 = -2*cs;
 115                 // a2 = 1 - alpha;
 116                 break;
 117
 118         case FILTER_NOTCH:
 119                 b0 = 1.0f;
 120                 b1 = -2*cs;
 121                 b2 = 1.0f;
 122                 // a1 = -2*cs;
 123                 // a2 = 1 - alpha;
 124                 break;
 125
 126         case FILTER_APF:
 127                 b0 = 1 - alpha;
 128                 b1 = -2*cs;
 129                 b2 = 1.0f;
 130                 // a1 = -2*cs;
 131                 // a2 = 1 - alpha;
 132                 break;
 133
 134         default:
 135                 //unknown filter type
 136                 assert(false);
 137                 break;
 138         }
 139
 140         const float invA0 = 1.0f / a0;
 141         b0 *= invA0;
 142         b1 *= invA0;
 143         b2 *= invA0;
 144         a1 *= invA0;
 145         a2 *= invA0;
 146 }
 147
 148 #ifndef NDEBUG
 149 void Filter::debug()
 150 {
 151         // Feed this to gnuplot to get a graph of the frequency response.
 152         const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
 153         printf("set xrange [2:%f]; ", Fs2);
 154         printf("set yrange [-80:20]; ");
 155         printf("set log x; ");
 156         printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
 157         printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
 158         printf("db(x) = 20*log10(x); ");
 159         printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
 160 }
 161 #endif
 162
 163 void Filter::init(FilterType type, int order)
 164 {
 165         filtertype = type;
 166         filter_order = order;
 167         if (filtertype == FILTER_NONE) filter_order = 0;
 168         if (filter_order == 0) filtertype = FILTER_NONE;
 169
 170         //reset feedback buffer
 171         for (unsigned i = 0; i < filter_order; i++) {
 172                 feedback[i].d0 = feedback[i].d1 = 0.0f;
 173         }
 174 }
 175
 176 #ifdef __SSE__
 177 void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
 178 #else
 179 void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
 180 #endif
 181 {
 182 #ifdef __SSE__
 183         const unsigned stride = 1;
 184         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 185         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 186 #endif
 187         assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
 188
 189         // Apply the filter FILTER_ORDER times.
 190         for (unsigned j = 0; j < filter_order; j++) {
 191                 float d0 = feedback[j].d0;
 192                 float d1 = feedback[j].d1;
 193                 float *inout_ptr = inout_buf;
 194
 195                 // Render n_samples mono samples. Unrolling manually by a
 196                 // factor four seemingly helps a lot, perhaps because it
 197                 // lets the CPU overlap arithmetic and memory operations
 198                 // better, or perhaps simply because the loop overhead is
 199                 // high.
 200                 for (unsigned i = n_samples >> 2; i; i--) {
 201                         float in, out;
 202
 203                         in = *inout_ptr;
 204                         out = b0*in + d0;
 205                         *inout_ptr = out;
 206                         d0 = b1*in - a1*out + d1;
 207                         d1 = b2*in - a2*out;
 208                         inout_ptr += stride;
 209
 210                         in = *inout_ptr;
 211                         out = b0*in + d0;
 212                         *inout_ptr = out;
 213                         d0 = b1*in - a1*out + d1;
 214                         d1 = b2*in - a2*out;
 215                         inout_ptr += stride;
 216
 217                         in = *inout_ptr;
 218                         out = b0*in + d0;
 219                         *inout_ptr = out;
 220                         d0 = b1*in - a1*out + d1;
 221                         d1 = b2*in - a2*out;
 222                         inout_ptr += stride;
 223
 224                         in = *inout_ptr;
 225                         out = b0*in + d0;
 226                         *inout_ptr = out;
 227                         d0 = b1*in - a1*out + d1;
 228                         d1 = b2*in - a2*out;
 229                         inout_ptr += stride;
 230                 }
 231                 early_undenormalise(d0); //do denormalization step
 232                 early_undenormalise(d1);
 233                 feedback[j].d0 = d0;
 234                 feedback[j].d1 = d1;
 235         }
 236
 237 #ifdef __SSE__
 238         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 239 #endif
 240 }
 241
 242 void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
 243 {
 244         //render buf_size mono samples
 245 #ifdef __SSE__
 246         assert(buf_size % 4 == 0);
 247 #endif
 248         if (filter_order == 0)
 249                 return;
 250
 251         this->set_linear_cutoff(cutoff);
 252         this->set_resonance(resonance);
 253         this->update();
 254         this->render_chunk(inout_buf, buf_size);
 255 }
 256
 257 void StereoFilter::init(FilterType type, int new_order)
 258 {
 259 #ifdef __SSE__
 260         parm_filter.init(type, new_order);
 261         memset(feedback, 0, sizeof(feedback));
 262 #else
 263         for (unsigned i = 0; i < 2; ++i) {
 264                 filters[i].init(type, 0, new_order, 0);
 265         }
 266 #endif
 267 }
 268
 269 void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance)
 270 {
 271 #ifdef __SSE__
 272         if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
 273                 return;
 274
 275         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 276         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 277
 278         parm_filter.set_linear_cutoff(cutoff);
 279         parm_filter.set_resonance(resonance);
 280         parm_filter.update();
 281
 282         __m128 b0 = _mm_set1_ps(parm_filter.b0);
 283         __m128 b1 = _mm_set1_ps(parm_filter.b1);
 284         __m128 b2 = _mm_set1_ps(parm_filter.b2);
 285         __m128 a1 = _mm_set1_ps(parm_filter.a1);
 286         __m128 a2 = _mm_set1_ps(parm_filter.a2);
 287
 288         // Apply the filter FILTER_ORDER times.
 289         for (unsigned j = 0; j < parm_filter.filter_order; j++) {
 290                 __m128 d0 = feedback[j].d0;
 291                 __m128 d1 = feedback[j].d1;
 292                 __m64 *inout_ptr = (__m64 *)inout_left_ptr;
 293
 294                 __m128 in = _mm_set1_ps(0.0f), out;
 295                 for (unsigned i = n_samples; i; i--) {
 296                         in = _mm_loadl_pi(in, inout_ptr);
 297                         out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
 298                         _mm_storel_pi(inout_ptr, out);
 299                         d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
 300                         d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
 301                         ++inout_ptr;
 302                 }
 303                 feedback[j].d0 = d0;
 304                 feedback[j].d1 = d1;
 305         }
 306
 307         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 308 #else
 309         if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
 310                 return;
 311
 312         for (unsigned i = 0; i < 2; ++i) {
 313                 filters[i].set_linear_cutoff(cutoff);
 314                 filters[i].set_resonance(resonance);
 315                 filters[i].update();
 316                 filters[i].render_chunk(inout_left_ptr, n_samples, 2);
 317
 318                 ++inout_left_ptr;
 319         }
 320 #endif
 321 }
 322
 323 /*
 324
 325   Find the transfer function for an IIR biquad. This is relatively basic signal
 326   processing, but for completeness, here's the rationale for the function:
 327
 328   The basic system of an IIR biquad looks like this, for input x[n], output y[n]
 329   and constant filter coefficients [ab][0-2]:
 330
 331     a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
 332
 333   Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
 334   DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
 335   due to the linearity and shift properties of the DFT:
 336
 337     a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
 338
 339   Simple factorization and reorganization yields
 340
 341     Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
 342
 343   and Y[w] / X[w] is by definition the filter's _transfer function_
 344   (customarily denoted by H(w)), ie. the complex factor it applies to the
 345   frequency component w. The absolute value of the transfer function is
 346   the frequency response, ie. how much frequency w is boosted or weakened.
 347
 348   (This derivation usually goes via the Z-transform and not the DFT, but the
 349   idea is exactly the same; the Z-transform is just a bit more general.)
 350
 351   Sending a signal through first one filter and then through another one
 352   will naturally be equivalent to a filter with the transfer function equal
 353   to the pointwise multiplication of the two filters, so for N-order filters
 354   we need to raise the answer to the Nth power.
 355
 356 */
 357 complex<double> Filter::evaluate_transfer_function(float omega)
 358 {
 359         complex<float> z = exp(complex<float>(0.0f, omega));
 360         complex<float> z2 = z * z;
 361         return pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);
 362 }