git.sesse.net Git - nageru/blob - filter.cpp

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <assert.h>
   5 #include <complex>
   6 #include <algorithm>
   7
   8 #ifdef __SSE__
   9 #include <mmintrin.h>
  10 #endif
  11
  12 #include "filter.h"
  13
  14 #ifdef __SSE__
  15
  16 // For SSE, we set the denormals-as-zero flag instead.
  17 #define early_undenormalise(sample)
  18
  19 #else  // !defined(__SSE__)
  20
  21 union uint_float {
  22         float f;
  23         unsigned int i;
  24 };
  25 #define early_undenormalise(sample) { \
  26          uint_float uf; \
  27          uf.f = float(sample); \
  28          if ((uf.i&0x60000000)==0) sample=0.0f; \
  29 }
  30
  31 #endif  // !_defined(__SSE__)
  32
  33 Filter::Filter()
  34 {
  35         omega = M_PI;
  36         resonance = 0.01f;
  37
  38         init(FILTER_NONE, 1);
  39         update();
  40 }
  41
  42 void Filter::update()
  43 {
  44         /*
  45           uses coefficients grabbed from
  46           RBJs audio eq cookbook:
  47           http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
  48         */
  49
  50         float sn, cs;
  51         float cutoff_freq = omega;
  52         cutoff_freq = std::min(cutoff_freq, (float)M_PI);
  53         cutoff_freq = std::max(cutoff_freq, 0.001f);
  54         calcSinCos(cutoff_freq, &sn, &cs);
  55         if (resonance <= 0) resonance = 0.001f;
  56
  57 #ifdef __GNUC__
  58         // Faster version of real_resonance = resonance ^ (1 / order).
  59         // pow(), at least on current GCC, is pretty slow.
  60         float real_resonance = resonance;
  61         switch (filter_order) {
  62         case 0:
  63         case 1:
  64                 break;
  65         case 4:
  66                 real_resonance = sqrt(real_resonance);
  67                 // Fall through.
  68         case 2:
  69                 real_resonance = sqrt(real_resonance);
  70                 break;
  71         case 3:
  72                 real_resonance = cbrt(real_resonance);
  73                 break;
  74         default:
  75                 assert(false);
  76         }
  77 #else
  78         float real_resonance = pow(resonance, 1.0f / filter_order);
  79 #endif
  80
  81         float alpha = float(sn / (2 * real_resonance));
  82         float a0 = 1 + alpha;
  83         a1 = -2 * cs;
  84         a2 = 1 - alpha;
  85
  86         switch (filtertype) {
  87         case FILTER_NONE:
  88                 a0 = b0 = 1.0f;
  89                 a1 = a2 = b1 = b2 = 0.0; //identity filter
  90                 break;
  91
  92         case FILTER_LPF:
  93                 b0 = (1 - cs) * 0.5f;
  94                 b1 = 1 - cs;
  95                 b2 = b0;
  96                 // a1 = -2*cs;
  97                 // a2 = 1 - alpha;
  98                 break;
  99
 100         case FILTER_HPF:
 101                 b0 = (1 + cs) * 0.5f;
 102                 b1 = -(1 + cs);
 103                 b2 = b0;
 104                 // a1 = -2*cs;
 105                 // a2 = 1 - alpha;
 106                 break;
 107
 108         case FILTER_BPF:
 109                 b0 = alpha;
 110                 b1 = 0.0f;
 111                 b2 = -alpha;
 112                 // a1 = -2*cs;
 113                 // a2 = 1 - alpha;
 114                 break;
 115
 116         case FILTER_NOTCH:
 117                 b0 = 1.0f;
 118                 b1 = -2*cs;
 119                 b2 = 1.0f;
 120                 // a1 = -2*cs;
 121                 // a2 = 1 - alpha;
 122                 break;
 123
 124         case FILTER_APF:
 125                 b0 = 1 - alpha;
 126                 b1 = -2*cs;
 127                 b2 = 1.0f;
 128                 // a1 = -2*cs;
 129                 // a2 = 1 - alpha;
 130                 break;
 131
 132         default:
 133                 //unknown filter type
 134                 assert(false);
 135                 break;
 136         }
 137
 138         const float invA0 = 1.0f / a0;
 139         b0 *= invA0;
 140         b1 *= invA0;
 141         b2 *= invA0;
 142         a1 *= invA0;
 143         a2 *= invA0;
 144 }
 145
 146 #ifndef NDEBUG
 147 void Filter::debug()
 148 {
 149         // Feed this to gnuplot to get a graph of the frequency response.
 150         const float Fs2 = OUTPUT_FREQUENCY * 0.5f;
 151         printf("set xrange [2:%f]; ", Fs2);
 152         printf("set yrange [-80:20]; ");
 153         printf("set log x; ");
 154         printf("phasor(x) = cos(x*pi/%f)*{1,0} + sin(x*pi/%f)*{0,1}; ", Fs2, Fs2);
 155         printf("tfunc(x, b0, b1, b2, a0, a1, a2) = (b0 * phasor(x)**2 + b1 * phasor(x) + b2) / (a0 * phasor(x)**2 + a1 * phasor(x) + a2); ");
 156         printf("db(x) = 20*log10(x); ");
 157         printf("plot db(abs(tfunc(x, %f, %f, %f, %f, %f, %f))) title \"\"\n", b0, b1, b2, 1.0f, a1, a2);
 158 }
 159 #endif
 160
 161 void Filter::init(FilterType type, int order)
 162 {
 163         filtertype = type;
 164         filter_order = order;
 165         if (filtertype == FILTER_NONE) filter_order = 0;
 166         if (filter_order == 0) filtertype = FILTER_NONE;
 167
 168         //reset feedback buffer
 169         for (unsigned i = 0; i < filter_order; i++) {
 170                 feedback[i].d0 = feedback[i].d1 = 0.0f;
 171         }
 172 }
 173
 174 #ifdef __SSE__
 175 void Filter::render_chunk(float *inout_buf, unsigned int n_samples)
 176 #else
 177 void Filter::render_chunk(float *inout_buf, unsigned int n_samples, unsigned stride)
 178 #endif
 179 {
 180 #ifdef __SSE__
 181         const unsigned stride = 1;
 182         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 183         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 184 #endif
 185         assert((n_samples & 3) == 0); // make sure n_samples is divisible by 4
 186
 187         // Apply the filter FILTER_ORDER times.
 188         for (unsigned j = 0; j < filter_order; j++) {
 189                 float d0 = feedback[j].d0;
 190                 float d1 = feedback[j].d1;
 191                 float *inout_ptr = inout_buf;
 192
 193                 // Render n_samples mono samples. Unrolling manually by a
 194                 // factor four seemingly helps a lot, perhaps because it
 195                 // lets the CPU overlap arithmetic and memory operations
 196                 // better, or perhaps simply because the loop overhead is
 197                 // high.
 198                 for (unsigned i = n_samples >> 2; i; i--) {
 199                         float in, out;
 200
 201                         in = *inout_ptr;
 202                         out = b0*in + d0;
 203                         *inout_ptr = out;
 204                         d0 = b1*in - a1*out + d1;
 205                         d1 = b2*in - a2*out;
 206                         inout_ptr += stride;
 207
 208                         in = *inout_ptr;
 209                         out = b0*in + d0;
 210                         *inout_ptr = out;
 211                         d0 = b1*in - a1*out + d1;
 212                         d1 = b2*in - a2*out;
 213                         inout_ptr += stride;
 214
 215                         in = *inout_ptr;
 216                         out = b0*in + d0;
 217                         *inout_ptr = out;
 218                         d0 = b1*in - a1*out + d1;
 219                         d1 = b2*in - a2*out;
 220                         inout_ptr += stride;
 221
 222                         in = *inout_ptr;
 223                         out = b0*in + d0;
 224                         *inout_ptr = out;
 225                         d0 = b1*in - a1*out + d1;
 226                         d1 = b2*in - a2*out;
 227                         inout_ptr += stride;
 228                 }
 229                 early_undenormalise(d0); //do denormalization step
 230                 early_undenormalise(d1);
 231                 feedback[j].d0 = d0;
 232                 feedback[j].d1 = d1;
 233         }
 234
 235 #ifdef __SSE__
 236         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 237 #endif
 238 }
 239
 240 void Filter::render(float *inout_buf, unsigned int buf_size, float cutoff, float resonance)
 241 {
 242         //render buf_size mono samples
 243 #ifdef __SSE__
 244         assert(buf_size % 4 == 0);
 245 #endif
 246         if (filter_order == 0)
 247                 return;
 248
 249         this->set_linear_cutoff(cutoff);
 250         this->set_resonance(resonance);
 251         this->update();
 252         this->render_chunk(inout_buf, buf_size);
 253 }
 254
 255 void StereoFilter::init(FilterType type, int new_order)
 256 {
 257 #ifdef __SSE__
 258         parm_filter.init(type, new_order);
 259         memset(feedback, 0, sizeof(feedback));
 260 #else
 261         for (unsigned i = 0; i < 2; ++i) {
 262                 filters[i].init(type, 0, new_order, 0);
 263         }
 264 #endif
 265 }
 266
 267 void StereoFilter::render(float *inout_left_ptr, unsigned n_samples, float cutoff, float resonance)
 268 {
 269 #ifdef __SSE__
 270         if (parm_filter.filtertype == FILTER_NONE || parm_filter.filter_order == 0)
 271                 return;
 272
 273         unsigned old_denormals_mode = _MM_GET_FLUSH_ZERO_MODE();
 274         _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
 275
 276         parm_filter.set_linear_cutoff(cutoff);
 277         parm_filter.set_resonance(resonance);
 278         parm_filter.update();
 279
 280         __m128 b0 = _mm_set1_ps(parm_filter.b0);
 281         __m128 b1 = _mm_set1_ps(parm_filter.b1);
 282         __m128 b2 = _mm_set1_ps(parm_filter.b2);
 283         __m128 a1 = _mm_set1_ps(parm_filter.a1);
 284         __m128 a2 = _mm_set1_ps(parm_filter.a2);
 285
 286         // Apply the filter FILTER_ORDER times.
 287         for (unsigned j = 0; j < parm_filter.filter_order; j++) {
 288                 __m128 d0 = feedback[j].d0;
 289                 __m128 d1 = feedback[j].d1;
 290                 __m64 *inout_ptr = (__m64 *)inout_left_ptr;
 291
 292                 __m128 in = _mm_set1_ps(0.0f), out;
 293                 for (unsigned i = n_samples; i; i--) {
 294                         in = _mm_loadl_pi(in, inout_ptr);
 295                         out = _mm_add_ps(_mm_mul_ps(b0, in), d0);
 296                         _mm_storel_pi(inout_ptr, out);
 297                         d0 = _mm_add_ps(_mm_sub_ps(_mm_mul_ps(b1, in), _mm_mul_ps(a1, out)), d1);
 298                         d1 = _mm_sub_ps(_mm_mul_ps(b2, in), _mm_mul_ps(a2, out));
 299                         ++inout_ptr;
 300                 }
 301                 feedback[j].d0 = d0;
 302                 feedback[j].d1 = d1;
 303         }
 304
 305         _MM_SET_FLUSH_ZERO_MODE(old_denormals_mode);
 306 #else
 307         if (filters[0].filtertype == FILTER_NONE || filters[0].filter_order == 0)
 308                 return;
 309
 310         for (unsigned i = 0; i < 2; ++i) {
 311                 filters[i].set_linear_cutoff(cutoff);
 312                 filters[i].set_resonance(resonance);
 313                 filters[i].update();
 314                 filters[i].render_chunk(inout_left_ptr, n_samples, 2);
 315
 316                 ++inout_left_ptr;
 317         }
 318 #endif
 319 }
 320
 321 /*
 322
 323   Find the transfer function for an IIR biquad. This is relatively basic signal
 324   processing, but for completeness, here's the rationale for the function:
 325
 326   The basic system of an IIR biquad looks like this, for input x[n], output y[n]
 327   and constant filter coefficients [ab][0-2]:
 328
 329     a2 y[n-2] + a1 y[n-1] + a0 y[n] = b2 x[n-2] + b1 x[n-1] + b0 x[n]
 330
 331   Taking the discrete Fourier transform (DFT) of both sides (denoting by convention
 332   DFT{x[n]} by X[w], where w is the angular frequency, going from 0 to 2pi), yields,
 333   due to the linearity and shift properties of the DFT:
 334
 335     a2 e^2jw Y[w] + a1 e^jw Y[w] + a0 Y[w] = b2 e^2jw X[w] + b1 e^jw X[w] + b0 Y[w]
 336
 337   Simple factorization and reorganization yields
 338
 339     Y[w] / X[w] = (b1 e^2jw + b1 e^jw + b0) / (a2 e^2jw + a1 e^jw + a0)
 340
 341   and Y[w] / X[w] is by definition the filter's _transfer function_
 342   (customarily denoted by H(w)), ie. the complex factor it applies to the
 343   frequency component w. The absolute value of the transfer function is
 344   the frequency response, ie. how much frequency w is boosted or weakened.
 345
 346   (This derivation usually goes via the Z-transform and not the DFT, but the
 347   idea is exactly the same; the Z-transform is just a bit more general.)
 348
 349   Sending a signal through first one filter and then through another one
 350   will naturally be equivalent to a filter with the transfer function equal
 351   to the pointwise multiplication of the two filters, so for N-order filters
 352   we need to raise the answer to the Nth power.
 353
 354 */
 355 std::complex<double> Filter::evaluate_transfer_function(float omega)
 356 {
 357         std::complex<float> z = exp(std::complex<float>(0.0f, omega));
 358         std::complex<float> z2 = z * z;
 359         return std::pow((b0 * z2 + b1 * z + b2) / (1.0f * z2 + a1 * z + a2), filter_order);
 360 }