git.sesse.net Git - movit/blob - fft_pass_effect.h

   1 #ifndef _MOVIT_FFT_PASS_EFFECT_H
   2 #define _MOVIT_FFT_PASS_EFFECT_H 1
   3
   4 // One pass of a radix-2, in-order, decimation-in-time 1D FFT/IFFT. If you
   5 // connect multiple ones of these together, you will eventually have a complete
   6 // FFT or IFFT. The FFTed data is not so useful for video effects in itself,
   7 // but enables faster convolutions (especially non-separable 2D convolutions)
   8 // than can be done directly, by doing FFT -> multiply -> IFFT. The utilities
   9 // for doing this efficiently will probably be added to Movit at a later date;
  10 // for now, this effect isn't the most useful.
  11 //
  12 // An introduction to FFTs is outside the scope of a file-level comment; see
  13 // http://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#The_radix-2_DIT_case .
  14 //
  15 // The pixels are not really interpreted as pixels, but are interpreted as two
  16 // complex numbers with (real,imaginary) parts stored in (R,G) and (B,A).
  17 // On top of this two-way parallelism, many FFTs are done in parallel (see below).
  18 //
  19 // Implementing a high-performance FFT on the GPU is not easy, especially
  20 // within the demands of Movit filters. (This is one of the places where
  21 // using CUDA or D3D would be easier, as both ship with pre-made and highly
  22 // tuned FFTs.) We don't go to great lengths to get an optimal implementation,
  23 // but rather stay with someting simple. I'll conveniently enough refer to
  24 // my own report on this topic from 2007, namely
  25 //
  26 //    Steinar H. Gunderson: “GPUwave: An implementation of the split-step
  27 //    Fourier method for the GPU”, http://gpuwave.sesse.net/gpuwave.pdf
  28 //
  29 // Chapter 5 contains the details of the FFT. We follow this rather closely,
  30 // with the exception that in Movit, we only ever draw a single quad,
  31 // so the strategy used in GPUwave with drawing multiple quads with constant
  32 // twiddle factors on them will not be in use here. (It requires some
  33 // benchmarking to find the optimal crossover point anyway.)
  34 //
  35 // Also, we support doing many FFTs along the same axis, so e.g. if you
  36 // have a 128x128 image and ask for a horizontal FFT of size 64, you will
  37 // actually get 256 of them (two wide, 128 high). This is in contrast with
  38 // GPUwave, which only supports them one wide; in a picture setting,
  39 // moving blocks around to create only one block wide FFTs would rapidly
  40 // lead to way too slender textures to be practical (e.g., 1280x720
  41 // with an FFT of size 64 would be 64x14400 rearranged, and many GPUs
  42 // have limits of 8192 pixels or even 2048 along one dimension).
  43 //
  44 // Note that this effect produces an _unnormalized_ FFT, which means that a
  45 // FFT -> IFFT chain will end up not returning the original data (even modulo
  46 // precision errors) but rather the original data with each element multiplied
  47 // by N, the FFT size. As the FFT and IFFT contribute equally to this energy
  48 // gain, it is recommended that you do the division by N after the FFT but
  49 // before the IFFT. This way, you use the least range possible (for one
  50 // scaling), and as fp16 has quite limited range at times, this can be relevant
  51 // on some GPUs for larger sizes.
  52
  53 #include <epoxy/gl.h>
  54 #include <assert.h>
  55 #include <stdio.h>
  56 #include <string>
  57
  58 #include "effect.h"
  59
  60 namespace movit {
  61
  62 class FFTPassEffect : public Effect {
  63 public:
  64         FFTPassEffect();
  65         ~FFTPassEffect();
  66         virtual std::string effect_type_id() const {
  67                 char buf[256];
  68                 if (inverse) {
  69                         snprintf(buf, sizeof(buf), "IFFTPassEffect[%d]", (1 << pass_number));
  70                 } else {
  71                         snprintf(buf, sizeof(buf), "FFTPassEffect[%d]", (1 << pass_number));
  72                 }
  73                 return buf;
  74         }
  75         std::string output_fragment_shader();
  76
  77         void set_gl_state(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num);
  78
  79         // We don't actually change the output size, but this flag makes sure
  80         // that no other effect is chained after us. This is important since
  81         // we cannot deliver filtered results; any attempt at sampling in-between
  82         // pixels would necessarily give garbage. In addition, we set our sampling
  83         // mode to GL_NEAREST, which other effects are not ready for; so, the
  84         // combination of these two flags guarantee that we're run entirely alone
  85         // in our own phase, which is exactly what we want.
  86         virtual bool needs_texture_bounce() const { return true; }
  87         virtual bool changes_output_size() const { return true; }
  88         virtual bool sets_virtual_output_size() const { return false; }
  89
  90         virtual void inform_input_size(unsigned input_num, unsigned width, unsigned height)
  91         {
  92                 assert(input_num == 0);
  93                 input_width = width;
  94                 input_height = height;
  95         }
  96
  97         virtual void get_output_size(unsigned *width, unsigned *height,
  98                                      unsigned *virtual_width, unsigned *virtual_height) const {
  99                 *width = *virtual_width = input_width;
 100                 *height = *virtual_height = input_height;
 101         }
 102
 103         virtual void inform_added(EffectChain *chain) { this->chain = chain; }
 104
 105         enum Direction { INVALID = -1, HORIZONTAL = 0, VERTICAL = 1 };
 106
 107 private:
 108         void generate_support_texture();
 109
 110         EffectChain *chain;
 111         int input_width, input_height;
 112         GLuint tex;
 113         float uniform_num_repeats;
 114         GLint uniform_support_tex;
 115
 116         int fft_size;
 117         Direction direction;
 118         int pass_number;  // From 1..n.
 119         int inverse;  // 0 = forward (FFT), 1 = reverse (IFFT).
 120
 121         int last_fft_size;
 122         Direction last_direction;
 123         int last_pass_number;
 124         int last_inverse;
 125         int last_input_size;
 126 };
 127
 128 }  // namespace movit
 129
 130 #endif // !defined(_MOVIT_FFT_PASS_EFFECT_H)