X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=fft_convolution_effect.h;fp=fft_convolution_effect.h;h=4136a2a7e2be02c9d88e81a1c2aaf62ab9f62c7d;hb=9e219b96e914b2e5709ba3c3345afdc3cf13a78d;hp=0000000000000000000000000000000000000000;hpb=daca930fb2f404e8800c35715f1fc5c4897d19fb;p=movit

diff --git a/fft_convolution_effect.h b/fft_convolution_effect.h
new file mode 100644
index 0000000..4136a2a
--- /dev/null
+++ b/fft_convolution_effect.h
@@ -0,0 +1,116 @@
+#ifndef _MOVIT_FFT_CONVOLUTION_EFFECT_H
+#define _MOVIT_FFT_CONVOLUTION_EFFECT_H 1
+
+// FFTConvolutionEffect applies an arbitrary 2D convolution between the input image
+// and a convolution kernel (assumed to be much smaller than the image).
+// It does this convolution using multiple smaller FFTs and an algorithm called
+// overlap-discard (also known as overlap-save) to achieve much higher
+// efficiency than direct evaluation of the convolution, at some expense of
+// accuracy.
+//
+// FFTConvolutionEffect follows the usual convention for convolution, which is that
+// you sample from the origin pixel, and then up and to the left from that. This means
+// that (in horizontal 1D) [1 0 0 0 0 ...] would be an identity transform, and that
+// [0 1 0 0 0 ...] would mean sampling one pixel to the left of the origin, which
+// effectively move would move the image one pixel to the right.
+//
+// The basic idea of the acceleration comes from the the convolution theorem
+// (which holds in any number of dimensions), namely that FFT(A â B) =
+// FFT(A) * FFT(B), where â is circular convolution and * is pointwise
+// multiplication. This means that A â B = IFFT(FFT(A) * FFT(B)), and since
+// we can do a 2D FFT in O(nÂ² log n), this is asymptotically better than
+// direct convolution, which is O(nÂ²mÂ²) (where m is the size of the convolution
+// kernel). However, the convolution theorem is rarely _directly_ applicable,
+// for two reasons:
+//
+//  - â is _circular_ convolution, which means that inputs are taken to
+//    repeat (wrap around), which is rarely what you want.
+//  - A and B must be the same size, which means that to convolve a 1280x720
+//    kernel with a 64x64 kernel, you need to zero pad the 64x64 kernel and
+//    then do _two_ full 1280x720 FFTs (one for each of A and B).
+//
+// The first problem is solved by adding m-1 zero pixels (horizontally
+// and vertically) as padding, and then discarding the result of those pixels.
+// This causes the output to be identical to a non-circular convolution.
+//
+// The second is slightly more tricky, and there are multiple ways of solving
+// it. The one that appears to be the most suitable to suitable for GPU use,
+// and the one that is used here, is overlap-discard (more commonly but less
+// precisely known as overlap-save). In overlap-discard, the input is broken
+// up into multiple equally-sized slices which are then FFTed and convolved
+// with the kernel individually. (The kernel must still be zero padded to the
+// same size as the slice, but this is typically much smaller then the full
+// picture.) As before, the pad area contains data that's essentially junk,
+// which is thrown away when the slices are put together again.
+//
+// The optimal slice size is a tradeoff. More slices means more space wasted
+// for padding, since the padding is the same no matter the slice size,
+// but fewer slices means we need to do larger FFTs (although fewer of them).
+// There's no exact closed formula for this, especially since the 2D case
+// makes things a bit more tricky with ordering of the X versus Y passes,
+// so we simply try all possible sizes and orderings, attempting to estimate
+// roughly how much each operation will cost. The result isn't perfect, though;
+// FFTW has a mode for actually measuring, which they claim improves speeds
+// by ~2x over simple estimation, but they also have much more freedom in
+// their execution model than we do.
+//
+// The output _size_ of a convolution can be defined in a couple of different
+// ways; in a sense, what's the most reasonable is using only the central part
+// of the result (the mode âvalidâ in MATLAB/Octave), since that is the only
+// one not used by any edge pixels. (FFTConvolutionEffect assumes normal Movit
+// edge pixel behavior, which is to repeat the outermost pixels.) You could also
+// keep all the output pixels (âfullâ in MATLAB/Octave), which is nicely symmetric.
+// However, for video processing, typically what you want is to have the _same_
+// output size as input size, so we crop to the input size. This means that
+// you'll get some of the edge-affected pixels but not all, but it's usually
+// an okay tradeoff.
+//
+// FFTConvolutionEffect does not do any actual pixel work by itself; it
+// rewrites itself into a long chain of SliceEffect, FFTPassEffect, FFTInput
+// and ComplexModulationEffect to do its bidding. Note that currently, due to
+// Movit limitations, we need to know the number of FFT passes at finalize()
+// time, which in turn means you cannot change image or kernel size on the fly.
+
+#include <assert.h>
+#include <epoxy/gl.h>
+#include <string>
+
+#include "effect.h"
+#include "fft_input.h"
+
+namespace movit {
+
+class PaddingEffect;
+
+class FFTConvolutionEffect : public Effect {
+public:
+	FFTConvolutionEffect(int input_width, int input_height, int convolve_width, int convolve_height);
+	~FFTConvolutionEffect();
+	virtual std::string effect_type_id() const { return "FFTConvolutionEffect"; }
+	std::string output_fragment_shader() { assert(false); }
+	virtual void rewrite_graph(EffectChain *graph, Node *self);
+
+	// See FFTInput::set_pixel_data().
+	void set_convolution_kernel(const float *pixel_data)
+	{
+		assert(fft_input);
+		fft_input->set_pixel_data(pixel_data);
+	}
+
+private:
+	int input_width, input_height;
+	int convolve_width, convolve_height;
+
+	// Chosen by algorithm.
+	int fft_width, fft_height;
+
+	// Both of these are owned by us if owns_effects is true (before finalize()),
+	// and otherwise owned by the EffectChain.
+	FFTInput *fft_input;
+	PaddingEffect *crop_effect;
+	bool owns_effects;
+};
+
+}  // namespace movit
+
+#endif // !defined(_MOVIT_FFT_CONVOLUTION_EFFECT_H)