From: Steinar H. Gunderson Date: Sun, 19 Jan 2014 17:27:55 +0000 (+0100) Subject: Add a new effect that can do FFT/IFFT. X-Git-Tag: 1.0~81 X-Git-Url: https://git.sesse.net/?p=movit;a=commitdiff_plain;h=c4f0d4e876a8177db5738596f22349e030e0a1dc;hp=614bef1bce4e94e7774d965b790a1d95b55b81bc Add a new effect that can do FFT/IFFT. --- diff --git a/Makefile.in b/Makefile.in index 78b82fb..73766ac 100644 --- a/Makefile.in +++ b/Makefile.in @@ -49,6 +49,7 @@ TESTED_EFFECTS += padding_effect TESTED_EFFECTS += resample_effect TESTED_EFFECTS += dither_effect TESTED_EFFECTS += deconvolution_sharpen_effect +TESTED_EFFECTS += fft_pass_effect UNTESTED_EFFECTS = sandbox_effect UNTESTED_EFFECTS += mirror_effect diff --git a/fft_pass_effect.cpp b/fft_pass_effect.cpp new file mode 100644 index 0000000..a3de379 --- /dev/null +++ b/fft_pass_effect.cpp @@ -0,0 +1,155 @@ +#include + +#include "fft_pass_effect.h" +#include "effect_util.h" +#include "util.h" + +FFTPassEffect::FFTPassEffect() + : input_width(1280), + input_height(720), + direction(HORIZONTAL) +{ + register_int("fft_size", &fft_size); + register_int("direction", (int *)&direction); + register_int("pass_number", &pass_number); + register_int("inverse", &inverse); + glGenTextures(1, &tex); +} + +FFTPassEffect::~FFTPassEffect() +{ + glDeleteTextures(1, &tex); +} + +std::string FFTPassEffect::output_fragment_shader() +{ + char buf[256]; + sprintf(buf, "#define DIRECTION_VERTICAL %d\n", (direction == VERTICAL)); + return buf + read_file("fft_pass_effect.frag"); +} + +void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num) +{ + Effect::set_gl_state(glsl_program_num, prefix, sampler_num); + + int input_size = (direction == VERTICAL) ? input_height : input_width; + + // See the comments on changes_output_size() in the .h file to see + // why this is legal. It is _needed_ because it counteracts the + // precision issues we get because we sample the input texture with + // normalized coordinates (especially when the repeat count along + // the axis is not a power of two); we very rapidly end up in narrowly + // missing a texel center, which causes precision loss to propagate + // throughout the FFT. + assert(*sampler_num == 1); + glActiveTexture(GL_TEXTURE0); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + check_error(); + + // The memory layout follows figure 5.2 on page 25 of + // http://gpuwave.sesse.net/gpuwave.pdf -- it can be a bit confusing + // at first, but is classically explained more or less as follows: + // + // The classic Cooley-Tukey decimation-in-time FFT algorithm works + // by first splitting input data into odd and even elements + // (e.g. bit-wise xxxxx0 and xxxxx1 for a size-32 FFT), then FFTing + // them separately and combining them using twiddle factors. + // So the outer pass (done _last_) looks only at the last bit, + // and does one such merge pass of sub-size N/2 (FFT size N). + // + // FFT of the first part must then necessarily be split into xxxx00 and + // xxxx10, and similarly xxxx01 and xxxx11 for the other part. Since + // these two FFTs are handled identically, it means we split into xxxx0x + // and xxxx1x, so that the second-outer pass (done second-to-last) + // looks only at the second last bit, and so on. We do two such merge + // passes of sub-size N/4 (sub-FFT size N/2). + // + // Thus, the inner, Nth pass (done first) splits at the first bit, + // so 0 is paired with 16, 1 with 17 and so on, doing N/2 such merge + // passes of sub-size 1 (sub-FFT size 2). We say that the stride is 16. + // The second-inner, (N-1)th pass (done second) splits at the second + // bit, so the stride is 8, and so on. + + assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. + float *tmp = new float[fft_size * 4]; + int subfft_size = 1 << pass_number; + double mulfac; + if (inverse) { + mulfac = 2.0 * M_PI; + } else { + mulfac = -2.0 * M_PI; + } + + assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. + assert(fft_size % subfft_size == 0); + int stride = fft_size / subfft_size; + for (int i = 0; i < fft_size; ++i) { + int k = i / stride; // Element number within this sub-FFT. + int offset = i % stride; // Sub-FFT number. + double twiddle_real, twiddle_imag; + + if (k < subfft_size / 2) { + twiddle_real = cos(mulfac * (k / double(subfft_size))); + twiddle_imag = sin(mulfac * (k / double(subfft_size))); + } else { + // This is mathematically equivalent to the twiddle factor calculations + // in the other branch of the if, but not numerically; the range + // reductions on x87 are not all that precise, and this keeps us within + // [0,pi>. + k -= subfft_size / 2; + twiddle_real = -cos(mulfac * (k / double(subfft_size))); + twiddle_imag = -sin(mulfac * (k / double(subfft_size))); + } + + // The support texture contains everything we need for the FFT: + // Obviously, the twiddle factor (in the Z and W components), but also + // which two samples to fetch. These are stored as normalized + // X coordinate offsets (Y coordinate for a vertical FFT); the reason + // for using offsets and not direct coordinates as in GPUwave + // is that we can have multiple FFTs along the same line, + // and want to reuse the support texture by repeating it. + int base = k * stride * 2 + offset; + int support_texture_index; + if (direction == FFTPassEffect::VERTICAL) { + // Compensate for OpenGL's bottom-left convention. + support_texture_index = fft_size - i - 1; + } else { + support_texture_index = i; + } + tmp[support_texture_index * 4 + 0] = (base - support_texture_index) / double(input_size); + tmp[support_texture_index * 4 + 1] = (base + stride - support_texture_index) / double(input_size); + tmp[support_texture_index * 4 + 2] = twiddle_real; + tmp[support_texture_index * 4 + 3] = twiddle_imag; + } + + glActiveTexture(GL_TEXTURE0 + *sampler_num); + check_error(); + glBindTexture(GL_TEXTURE_1D, tex); + check_error(); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_REPEAT); + check_error(); + + // Supposedly FFTs are very sensitive to inaccuracies in the twiddle factors, + // at least according to a paper by Schatzman (see gpuwave.pdf reference [30] + // for the full reference), so we keep them at 32-bit. However, for + // small sizes, all components are exact anyway, so we can cheat there + // (although noting that the source coordinates become somewhat less + // accurate then, too). + glTexImage1D(GL_TEXTURE_1D, 0, (subfft_size <= 4) ? GL_RGBA16F : GL_RGBA32F, fft_size, 0, GL_RGBA, GL_FLOAT, tmp); + check_error(); + + delete[] tmp; + + set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num); + ++*sampler_num; + + assert(input_size % fft_size == 0); + set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size); +} diff --git a/fft_pass_effect.frag b/fft_pass_effect.frag new file mode 100644 index 0000000..462a673 --- /dev/null +++ b/fft_pass_effect.frag @@ -0,0 +1,24 @@ +// DIRECTION_VERTICAL will be #defined to 1 if we are doing a vertical FFT, +// and 0 otherwise. + +uniform float PREFIX(num_repeats); +uniform sampler1D PREFIX(support_tex); + +vec4 FUNCNAME(vec2 tc) { +#if DIRECTION_VERTICAL + vec4 support = texture1D(PREFIX(support_tex), tc.y * PREFIX(num_repeats)); + vec4 c1 = INPUT(vec2(tc.x, 1.0 - (tc.y + support.x))); + vec4 c2 = INPUT(vec2(tc.x, 1.0 - (tc.y + support.y))); +#else + vec4 support = texture1D(PREFIX(support_tex), tc.x * PREFIX(num_repeats)); + vec4 c1 = INPUT(vec2(tc.x + support.x, tc.y)); + vec4 c2 = INPUT(vec2(tc.x + support.y, tc.y)); +#endif + // Two complex additions and multiplications in parallel; essentially + // + // result.xy = c1.xy + twiddle * c2.xy + // result.zw = c1.zw + twiddle * c2.zw + // + // where * is complex multiplication. + return c1 + support.z * c2 + support.w * vec4(-c2.y, c2.x, -c2.w, c2.z); +} diff --git a/fft_pass_effect.h b/fft_pass_effect.h new file mode 100644 index 0000000..b3e025b --- /dev/null +++ b/fft_pass_effect.h @@ -0,0 +1,110 @@ +#ifndef _MOVIT_FFT_PASS_EFFECT_H +#define _MOVIT_FFT_PASS_EFFECT_H 1 + +// One pass of a radix-2, in-order, decimation-in-time 1D FFT/IFFT. If you +// connect multiple ones of these together, you will eventually have a complete +// FFT or IFFT. The FFTed data is not so useful for video effects in itself, +// but enables faster convolutions (especially non-separable 2D convolutions) +// than can be done directly, by doing FFT -> multiply -> IFFT. The utilities +// for doing this efficiently will probably be added to Movit at a later date; +// for now, this effect isn't the most useful. +// +// An introduction to FFTs is outside the scope of a file-level comment; see +// http://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#The_radix-2_DIT_case . +// +// The pixels are not really interpreted as pixels, but are interpreted as two +// complex numbers with (real,imaginary) parts stored in (R,G) and (B,A). +// On top of this two-way parallelism, many FFTs are done in parallel (see below). +// +// Implementing a high-performance FFT on the GPU is not easy, especially +// within the demands of Movit filters. (This is one of the places where +// using CUDA or D3D would be easier, as both ship with pre-made and highly +// tuned FFTs.) We don't go to great lengths to get an optimal implementation, +// but rather stay with someting simple. I'll conveniently enough refer to +// my own report on this topic from 2007, namely +// +// Steinar H. Gunderson: “GPUwave: An implementation of the split-step +// Fourier method for the GPU”, http://gpuwave.sesse.net/gpuwave.pdf +// +// Chapter 5 contains the details of the FFT. We follow this rather closely, +// with the exception that in Movit, we only ever draw a single quad, +// so the strategy used in GPUwave with drawing multiple quads with constant +// twiddle factors on them will not be in use here. (It requires some +// benchmarking to find the optimal crossover point anyway.) +// +// Also, we support doing many FFTs along the same axis, so e.g. if you +// have a 128x128 image and ask for a horizontal FFT of size 64, you will +// actually get 256 of them (two wide, 128 high). This is in contrast with +// GPUwave, which only supports them one wide; in a picture setting, +// moving blocks around to create only one block wide FFTs would rapidly +// lead to way too slender textures to be practical (e.g., 1280x720 +// with an FFT of size 64 would be 64x14400 rearranged, and many GPUs +// have limits of 8192 pixels or even 2048 along one dimension). +// +// Note that this effect produces an _unnormalized_ FFT, which means that a +// FFT -> IFFT chain will end up not returning the original data (even modulo +// precision errors) but rather the original data with each element multiplied +// by N, the FFT size. As the FFT and IFFT contribute equally to this energy +// gain, it is recommended that you do the division by N after the FFT but +// before the IFFT. This way, you use the least range possible (for one +// scaling), and as fp16 has quite limited range at times, this can be relevant +// on some GPUs for larger sizes. + +#include +#include +#include + +#include "effect.h" + +class FFTPassEffect : public Effect { +public: + FFTPassEffect(); + ~FFTPassEffect(); + virtual std::string effect_type_id() const { + char buf[256]; + if (inverse) { + snprintf(buf, sizeof(buf), "IFFTPassEffect[%d]", (1 << pass_number)); + } else { + snprintf(buf, sizeof(buf), "FFTPassEffect[%d]", (1 << pass_number)); + } + return buf; + } + std::string output_fragment_shader(); + + void set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num); + + // We don't actually change the output size, but this flag makes sure + // that no other effect is chained after us. This is important since + // we cannot deliver filtered results; any attempt at sampling in-between + // pixels would necessarily give garbage. In addition, we set our sampling + // mode to GL_NEAREST, which other effects are not ready for; so, the + // combination of these two flags guarantee that we're run entirely alone + // in our own phase, which is exactly what we want. + virtual bool needs_texture_bounce() const { return true; } + virtual bool changes_output_size() const { return true; } + + virtual void inform_input_size(unsigned input_num, unsigned width, unsigned height) + { + assert(input_num == 0); + input_width = width; + input_height = height; + } + + virtual void get_output_size(unsigned *width, unsigned *height, + unsigned *virtual_width, unsigned *virtual_height) const { + *width = *virtual_width = input_width; + *height = *virtual_height = input_height; + } + + enum Direction { HORIZONTAL = 0, VERTICAL = 1 }; + +private: + int input_width, input_height; + GLuint tex; + int fft_size; + Direction direction; + int pass_number; // From 1..n. + int inverse; // 0 = forward (FFT), 1 = reverse (IFFT). +}; + +#endif // !defined(_MOVIT_FFT_PASS_EFFECT_H) diff --git a/fft_pass_effect_test.cpp b/fft_pass_effect_test.cpp new file mode 100644 index 0000000..6a6406c --- /dev/null +++ b/fft_pass_effect_test.cpp @@ -0,0 +1,332 @@ +// Unit tests for FFTPassEffect. + +#include + +#include "effect_chain.h" +#include "gtest/gtest.h" +#include "image_format.h" +#include "fft_pass_effect.h" +#include "multiply_effect.h" +#include "test_util.h" + +namespace { + +// Generate a random number uniformly distributed between [-1.0, 1.0]. +float uniform_random() +{ + return 2.0 * ((float)rand() / RAND_MAX - 0.5); +} + +void setup_fft(EffectChain *chain, int fft_size, bool inverse, + bool add_normalizer = false, + FFTPassEffect::Direction direction = FFTPassEffect::HORIZONTAL) +{ + assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. + for (int i = 1, subsize = 2; subsize <= fft_size; ++i, subsize *= 2) { + Effect *fft_effect = chain->add_effect(new FFTPassEffect()); + bool ok = fft_effect->set_int("fft_size", fft_size); + ok |= fft_effect->set_int("pass_number", i); + ok |= fft_effect->set_int("inverse", inverse); + ok |= fft_effect->set_int("direction", direction); + assert(ok); + } + + if (add_normalizer) { + float factor[4] = { 1.0f / fft_size, 1.0f / fft_size, 1.0f / fft_size, 1.0f / fft_size }; + Effect *multiply_effect = chain->add_effect(new MultiplyEffect()); + bool ok = multiply_effect->set_vec4("factor", factor); + assert(ok); + } +} + +void run_fft(const float *in, float *out, int fft_size, bool inverse, + bool add_normalizer = false, + FFTPassEffect::Direction direction = FFTPassEffect::HORIZONTAL) +{ + int width, height; + if (direction == FFTPassEffect::HORIZONTAL) { + width = fft_size; + height = 1; + } else { + width = 1; + height = fft_size; + } + EffectChainTester tester(in, width, height, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR); + setup_fft(tester.get_chain(), fft_size, inverse, add_normalizer, direction); + tester.run(out, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR, OUTPUT_ALPHA_FORMAT_PREMULTIPLIED); +} + +} // namespace + +TEST(FFTPassEffectTest, ZeroStaysZero) { + const int fft_size = 64; + float data[fft_size * 4] = { 0 }; + float out_data[fft_size * 4]; + + run_fft(data, out_data, fft_size, false); + expect_equal(data, out_data, 4, fft_size); + + run_fft(data, out_data, fft_size, true); + expect_equal(data, out_data, 4, fft_size); +} + +TEST(FFTPassEffectTest, Impulse) { + const int fft_size = 64; + float data[fft_size * 4] = { 0 }; + float expected_data[fft_size * 4], out_data[fft_size * 4]; + data[0] = 1.0; + data[1] = 1.2; + data[2] = 1.4; + data[3] = 3.0; + + for (int i = 0; i < fft_size; ++i) { + expected_data[i * 4 + 0] = data[0]; + expected_data[i * 4 + 1] = data[1]; + expected_data[i * 4 + 2] = data[2]; + expected_data[i * 4 + 3] = data[3]; + } + + run_fft(data, out_data, fft_size, false); + expect_equal(expected_data, out_data, 4, fft_size); + + run_fft(data, out_data, fft_size, true); + expect_equal(expected_data, out_data, 4, fft_size); +} + +TEST(FFTPassEffectTest, SingleFrequency) { + const int fft_size = 16; + float data[fft_size * 4] = { 0 }; + float expected_data[fft_size * 4], out_data[fft_size * 4]; + for (int i = 0; i < fft_size; ++i) { + data[i * 4 + 0] = sin(2.0 * M_PI * (4.0 * i) / fft_size); + data[i * 4 + 1] = 0.0; + data[i * 4 + 2] = 0.0; + data[i * 4 + 3] = 0.0; + } + for (int i = 0; i < fft_size; ++i) { + expected_data[i * 4 + 0] = 0.0; + expected_data[i * 4 + 1] = 0.0; + expected_data[i * 4 + 2] = 0.0; + expected_data[i * 4 + 3] = 0.0; + } + expected_data[4 * 4 + 1] = -8.0; + expected_data[12 * 4 + 1] = 8.0; + + run_fft(data, out_data, fft_size, false, false, FFTPassEffect::HORIZONTAL); + expect_equal(expected_data, out_data, 4, fft_size); + + run_fft(data, out_data, fft_size, false, false, FFTPassEffect::VERTICAL); + expect_equal(expected_data, out_data, 4, fft_size); +} + +TEST(FFTPassEffectTest, Repeat) { + const int fft_size = 64; + const int num_repeats = 31; // Prime, to make things more challenging. + float data[num_repeats * fft_size * 4] = { 0 }; + float expected_data[num_repeats * fft_size * 4], out_data[num_repeats * fft_size * 4]; + + srand(12345); + for (int i = 0; i < num_repeats * fft_size * 4; ++i) { + data[i] = uniform_random(); + } + + for (int i = 0; i < num_repeats; ++i) { + run_fft(data + i * fft_size * 4, expected_data + i * fft_size * 4, fft_size, false); + } + + { + // Horizontal. + EffectChainTester tester(data, num_repeats * fft_size, 1, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR); + setup_fft(tester.get_chain(), fft_size, false); + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR, OUTPUT_ALPHA_FORMAT_PREMULTIPLIED); + + expect_equal(expected_data, out_data, 4, num_repeats * fft_size); + } + { + // Vertical. + EffectChainTester tester(data, 1, num_repeats * fft_size, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR); + setup_fft(tester.get_chain(), fft_size, false, false, FFTPassEffect::VERTICAL); + tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR, OUTPUT_ALPHA_FORMAT_PREMULTIPLIED); + + expect_equal(expected_data, out_data, 4, num_repeats * fft_size); + } +} + +TEST(FFTPassEffectTest, TwoDimensional) { // Implicitly tests vertical. + srand(1234); + const int fft_size = 16; + float in[fft_size * fft_size * 4], out[fft_size * fft_size * 4], expected_out[fft_size * fft_size * 4]; + for (int y = 0; y < fft_size; ++y) { + for (int x = 0; x < fft_size; ++x) { + in[(y * fft_size + x) * 4 + 0] = + sin(2.0 * M_PI * (2 * x + 3 * y) / fft_size); + in[(y * fft_size + x) * 4 + 1] = 0.0; + in[(y * fft_size + x) * 4 + 2] = 0.0; + in[(y * fft_size + x) * 4 + 3] = 0.0; + } + } + memset(expected_out, 0, sizeof(expected_out)); + + // This result has been verified using the fft2() function in Octave, + // which uses FFTW. + expected_out[(3 * fft_size + 2) * 4 + 1] = -128.0; + expected_out[(13 * fft_size + 14) * 4 + 1] = 128.0; + + EffectChainTester tester(in, fft_size, fft_size, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR); + setup_fft(tester.get_chain(), fft_size, false, false, FFTPassEffect::HORIZONTAL); + setup_fft(tester.get_chain(), fft_size, false, false, FFTPassEffect::VERTICAL); + tester.run(out, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR, OUTPUT_ALPHA_FORMAT_PREMULTIPLIED); + + expect_equal(expected_out, out, 4 * fft_size, fft_size, 0.25, 0.0005); +} + +// The classic paper for FFT correctness testing is Funda Ergün: +// “Testing Multivariate Linear Functions: Overcoming the Generator Bottleneck” +// (http://www.cs.sfu.ca/~funda/PUBLICATIONS/stoc95.ps), which proves that +// testing three basic properties of FFTs guarantees that the function is +// correct (at least under the assumption that errors are random). +// +// We don't follow the paper directly, though, for a few reasons: First, +// Ergün's paper really considers _self-correcting_ systems, which may +// be stochastically faulty, and thus uses various relatively complicated +// bounds and tests we don't really need. Second, the FFTs it considers +// are all about polynomials over finite fields, which means that results +// are exact and thus easy to test; we work with floats (half-floats!), +// and thus need some error tolerance. +// +// So instead, we follow the implementation of FFTW, which is really the +// gold standard when it comes to FFTs these days. They hard-code 20 +// testing rounds as opposed to the more complicated bounds in the paper, +// and have a simpler version of the third test. +// +// The error bounds are set somewhat empirically, but remember that these +// inputs will give frequency values as large as ~16, where 0.025 is +// within the 9th bit (of 11 total mantissa bits in fp16). +const int ergun_rounds = 20; + +// Test 1: Test that FFT(a + b) = FFT(a) + FFT(b). +TEST(FFTPassEffectTest, ErgunLinearityTest) { + srand(1234); + const int max_fft_size = 64; + float a[max_fft_size * 4], b[max_fft_size * 4], sum[max_fft_size * 4]; + float a_out[max_fft_size * 4], b_out[max_fft_size * 4], sum_out[max_fft_size * 4], expected_sum_out[max_fft_size * 4]; + for (int fft_size = 2; fft_size <= max_fft_size; fft_size *= 2) { + for (int inverse = 0; inverse <= 1; ++inverse) { + for (int i = 0; i < ergun_rounds; ++i) { + for (int j = 0; j < fft_size * 4; ++j) { + a[j] = uniform_random(); + b[j] = uniform_random(); + } + run_fft(a, a_out, fft_size, inverse); + run_fft(b, b_out, fft_size, inverse); + + for (int j = 0; j < fft_size * 4; ++j) { + sum[j] = a[j] + b[j]; + expected_sum_out[j] = a_out[j] + b_out[j]; + } + + run_fft(sum, sum_out, fft_size, inverse); + expect_equal(expected_sum_out, sum_out, 4, fft_size, 0.03, 0.0005); + } + } + } +} + +// Test 2: Test that FFT(delta(i)) = 1 (where delta(i) = [1 0 0 0 ...]), +// or more specifically, test that FFT(a + delta(i)) - FFT(a) = 1. +TEST(FFTPassEffectTest, ErgunImpulseTransform) { + srand(1235); + const int max_fft_size = 64; + float a[max_fft_size * 4], b[max_fft_size * 4]; + float a_out[max_fft_size * 4], b_out[max_fft_size * 4], sum_out[max_fft_size * 4], expected_sum_out[max_fft_size * 4]; + for (int fft_size = 2; fft_size <= max_fft_size; fft_size *= 2) { + for (int inverse = 0; inverse <= 1; ++inverse) { + for (int i = 0; i < ergun_rounds; ++i) { + for (int j = 0; j < fft_size * 4; ++j) { + a[j] = uniform_random(); + + // Compute delta(j) - a. + if (j < 4) { + b[j] = 1.0 - a[j]; + } else { + b[j] = -a[j]; + } + } + run_fft(a, a_out, fft_size, inverse); + run_fft(b, b_out, fft_size, inverse); + + for (int j = 0; j < fft_size * 4; ++j) { + sum_out[j] = a_out[j] + b_out[j]; + expected_sum_out[j] = 1.0; + } + expect_equal(expected_sum_out, sum_out, 4, fft_size, 0.025, 0.0005); + } + } + } +} + +// Test 3: Test the time-shift property of the FFT, in that a circular left-shift +// multiplies the result by e^(j 2pi k/N) (linear phase adjustment). +// As fftw_test.c says, “The paper performs more tests, but this code should be +// fine too”. +TEST(FFTPassEffectTest, ErgunShiftProperty) { + srand(1236); + const int max_fft_size = 64; + float a[max_fft_size * 4], b[max_fft_size * 4]; + float a_out[max_fft_size * 4], b_out[max_fft_size * 4], expected_a_out[max_fft_size * 4]; + for (int fft_size = 2; fft_size <= max_fft_size; fft_size *= 2) { + for (int inverse = 0; inverse <= 1; ++inverse) { + for (int direction = 0; direction <= 1; ++direction) { + for (int i = 0; i < ergun_rounds; ++i) { + for (int j = 0; j < fft_size * 4; ++j) { + a[j] = uniform_random(); + } + + // Circular shift left by one step. + for (int j = 0; j < fft_size * 4; ++j) { + b[j] = a[(j + 4) % (fft_size * 4)]; + } + run_fft(a, a_out, fft_size, inverse, false, FFTPassEffect::Direction(direction)); + run_fft(b, b_out, fft_size, inverse, false, FFTPassEffect::Direction(direction)); + + for (int j = 0; j < fft_size; ++j) { + double s = -sin(j * 2.0 * M_PI / fft_size); + double c = cos(j * 2.0 * M_PI / fft_size); + if (inverse) { + s = -s; + } + + expected_a_out[j * 4 + 0] = b_out[j * 4 + 0] * c - b_out[j * 4 + 1] * s; + expected_a_out[j * 4 + 1] = b_out[j * 4 + 0] * s + b_out[j * 4 + 1] * c; + + expected_a_out[j * 4 + 2] = b_out[j * 4 + 2] * c - b_out[j * 4 + 3] * s; + expected_a_out[j * 4 + 3] = b_out[j * 4 + 2] * s + b_out[j * 4 + 3] * c; + } + expect_equal(expected_a_out, a_out, 4, fft_size, 0.025, 0.0005); + } + } + } + } +} + +TEST(FFTPassEffectTest, BigFFTAccuracy) { + srand(1234); + const int max_fft_size = 2048; + float in[max_fft_size * 4], out[max_fft_size * 4], out2[max_fft_size * 4]; + for (int fft_size = 2; fft_size <= max_fft_size; fft_size *= 2) { + for (int j = 0; j < fft_size * 4; ++j) { + in[j] = uniform_random(); + } + run_fft(in, out, fft_size, false, true); // Forward, with normalization. + run_fft(out, out2, fft_size, true); // Reverse. + + // These error bounds come from + // http://en.wikipedia.org/wiki/Fast_Fourier_transform#Accuracy_and_approximations, + // with empirically estimated epsilons. Note that the calculated + // rms in expect_equal() is divided by sqrt(N), so we compensate + // similarly here. + double max_error = 0.0009 * log2(fft_size); + double rms_limit = 0.0007 * sqrt(log2(fft_size)) / sqrt(fft_size); + expect_equal(in, out2, 4, fft_size, max_error, rms_limit); + } +}