Add support for compute shaders.

[movit] / effect.h
diff --git a/effect.h b/effect.h

index 65fdf522af305491ca8e6abb594355aae9934e15..073780f172bdbc929db64238c1705221264691c4 100644 (file)
--- a/effect.h
+++ b/effect.h
@@ -1,5 +1,5 @@
-#ifndef _EFFECT_H
-#define _EFFECT_H 1
+#ifndef _MOVIT_EFFECT_H
+#define _MOVIT_EFFECT_H 1
  
  // Effect is the base class for every effect. It basically represents a single
  // GLSL function, with an optional set of user-settable parameters.
@@ -10,19 +10,24 @@
  // effect instance; use the macro PREFIX() around your identifiers to
  // automatically prepend that prefix.
  
-#include <GL/glew.h>
+#include <epoxy/gl.h>
  #include <assert.h>
  #include <stddef.h>
  #include <map>
  #include <string>
+#include <vector>
+#include <Eigen/Core>
  
-#include "util.h"
+#include "defs.h"
+
+namespace movit {
  
  class EffectChain;
  class Node;
  
  // Can alias on a float[2].
  struct Point2D {
+       Point2D() {}
         Point2D(float x, float y)
                 : x(x), y(y) {}
  
@@ -31,6 +36,7 @@ struct Point2D {
  
  // Can alias on a float[3].
  struct RGBTriplet {
+       RGBTriplet() {}
         RGBTriplet(float r, float g, float b)
                 : r(r), g(g), b(b) {}
  
@@ -38,13 +44,24 @@ struct RGBTriplet {
  };
  
  // Can alias on a float[4].
-struct RGBATriplet {
-       RGBATriplet(float r, float g, float b, float a)
+struct RGBATuple {
+       RGBATuple() {}
+       RGBATuple(float r, float g, float b, float a)
                 : r(r), g(g), b(b), a(a) {}
  
         float r, g, b, a;
  };
  
+// Represents a registered uniform.
+template<class T>
+struct Uniform {
+       std::string name;  // Without prefix.
+       const T *value;  // Owner by the effect.
+       size_t num_values;  // Number of elements; for arrays only. _Not_ the vector length.
+       std::string prefix;  // Filled in only after phases have been constructed.
+       GLint location;  // Filled in only after phases have been constructed. -1 if no location.
+};
+
  class Effect {
  public:
         virtual ~Effect() {}
@@ -121,7 +138,7 @@ public:
                 // Keeps the type of alpha (premultiplied, postmultiplied, blank)
                 // unchanged from input to output. Usually appropriate if you
                 // process all color channels in a linear fashion, do not change
-               // alpha, and do not produce any new pixels thare have alpha != 1.0.
+               // alpha, and do not produce any new pixels that have alpha != 1.0.
                 //
                 // Does not make sense for inputs.
                 DONT_CARE_ALPHA_TYPE,
@@ -159,12 +176,61 @@ public:
         // needs mipmaps, you will also get them).
         virtual bool needs_mipmaps() const { return false; }
  
+       // Whether there is a direct correspondence between input and output
+       // texels. Specifically, the effect must not:
+       //
+       //   1. Try to sample in the border (ie., outside the 0.0 to 1.0 area).
+       //   2. Try to sample between texels.
+       //   3. Sample with an x- or y-derivative different from -1 or 1.
+       //      (This also means needs_mipmaps() and one_to_one_sampling()
+       //      together would make no sense.)
+       //
+       // The most common case for this would be an effect that has an exact
+       // 1:1-correspondence between input and output texels, e.g. SaturationEffect.
+       // However, more creative things, like mirroring/flipping or padding,
+       // would also be allowed.
+       //
+       // The primary gain from setting this is that you can sample directly
+       // from an effect that changes output size (see changes_output_size() below),
+       // without going through a bounce texture. It won't work for effects that
+       // set sets_virtual_output_size(), though.
+       //
+       // Does not make a lot of sense together with needs_texture_bounce().
+       virtual bool one_to_one_sampling() const { return false; }
+
         // Whether this effect wants to output to a different size than
-       // its input(s) (see inform_input_size(), below). If you set this to
-       // true, the output will be bounced to a texture (similarly to if the
-       // next effect set needs_texture_bounce()).
+       // its input(s) (see inform_input_size(), below). See also
+       // sets_virtual_output_size() below.
         virtual bool changes_output_size() const { return false; }
  
+       // Whether your get_output_size() function (see below) intends to ever set
+       // virtual_width different from width, or similar for height.
+       // It does not make sense to set this to true if changes_output_size() is false.
+       virtual bool sets_virtual_output_size() const { return changes_output_size(); }
+
+       // Whether this effect is effectively sampling from a a single texture.
+       // If so, it will override needs_texture_bounce(); however, there are also
+       // two demands it needs to fulfill:
+       //
+       //  1. It needs to be an Input, ie. num_inputs() == 0.
+       //  2. It needs to allocate exactly one sampler in set_gl_state(),
+       //     and allow dependent effects to change that sampler state.
+       virtual bool is_single_texture() const { return false; }
+
+       // If set, this effect should never be bounced to an output, even if a
+       // dependent effect demands texture bounce.
+       //
+       // Note that setting this can invoke undefined behavior, up to and including crashing,
+       // so you should only use it if you have deep understanding of your entire chain
+       // and Movit's processing of it. The most likely use case is if you have an input
+       // that's cheap to compute but not a single texture (e.g. YCbCrInput), and want
+       // to run a ResampleEffect directly from it. Normally, this would require a bounce,
+       // but it's faster not to. (However, also note that in this case, effective texel
+       // subpixel precision will be too optimistic, since chroma is already subsampled.)
+       //
+       // Has no effect if is_single_texture() is set.
+       virtual bool override_disable_bounce() const { return false; }
+
         // If changes_output_size() is true, you must implement this to tell
         // the framework what output size you want. Also, you can set a
         // virtual width/height, which is the size the next effect (if any)
@@ -179,6 +245,45 @@ public:
                 assert(false);
         }
  
+       // Whether this effect uses a compute shader instead of a regular fragment shader.
+       // Compute shaders are more flexible in that they can have multiple outputs
+       // for each invocation and also communicate between instances (by using shared
+       // memory within each group), but are not universally supported. The typical
+       // pattern would be to check movit_compute_shaders_supported and rewrite the
+       // graph to use a compute shader effect instead of a regular effect if it is
+       // available, in order to get better performance. Since compute shaders can reuse
+       // loads (again typically through shared memory), using needs_texture_bounce()
+       // is usually not needed, although it is allowed; the best candidates for compute
+       // shaders are typically those that sample many times from their input
+       // but can reuse those loads across neighboring instances.
+       //
+       // Compute shaders commonly work with unnormalized texture coordinates
+       // (where coordinates are integers [0..W) and [0..H)), whereas the rest
+       // of Movit, including any inputs you may want to sample from, works
+       // with normalized coordinates ([0..1)). Movit gives you uniforms
+       // PREFIX(inv_output_size) and PREFIX(output_texcoord_adjust) that you
+       // can use to transform unnormalized to normalized, as well as a macro
+       // NORMALIZE_TEXTURE_COORDS(vec2) that does it for you.
+       //
+       // Since compute shaders have flexible output, it is difficult to chain other
+       // effects after them in the same phase, and thus, they will always be last.
+       // (This limitation may be lifted for the special case of one-to-one effects
+       // in the future.) Furthermore, they cannot write to the framebuffer, just to
+       // textures, so Movit may have to insert an extra phase just to do the output
+       // from a texture to the screen in some cases. However, this is transparent
+       // to both the effect and the user.
+       virtual bool is_compute_shader() const { return false; }
+
+       // For a compute shader (see the previous member function), what dimensions
+       // it should be invoked over. Called every frame, before uniforms are set
+       // (so you are allowed to update uniforms based from this call).
+       virtual void get_compute_dimensions(unsigned output_width, unsigned output_height,
+                                           unsigned *x, unsigned *y, unsigned *z) const {
+               *x = output_width;
+               *y = output_height;
+               *z = 1;
+       }
+
         // Tells the effect the resolution of each of its input.
         // This will be called every frame, and always before get_output_size(),
         // so you can change your output size based on the input if so desired.
@@ -196,6 +301,12 @@ public:
         // if you have several, they will be INPUT1(), INPUT2(), and so on.
         virtual unsigned num_inputs() const { return 1; }
  
+       // Inform the effect that it has been just added to the EffectChain.
+       // The primary use for this is to store the ResourcePool uesd by
+       // the chain; for modifications to it, rewrite_graph() below
+       // is probably a better fit.
+       virtual void inform_added(EffectChain *chain) {}
+
         // Let the effect rewrite the effect chain as it sees fit.
         // Most effects won't need to do this, but this is very useful
         // if you have an effect that consists of multiple sub-effects
@@ -210,11 +321,6 @@ public:
         // itself from all other effects.
         virtual void rewrite_graph(EffectChain *graph, Node *self) {}
  
-       // Outputs one GLSL uniform declaration for each registered parameter
-       // (see below), with the right prefix prepended to each uniform name.
-       // If you do not want this behavior, you can override this function.
-       virtual std::string output_convenience_uniforms() const;
-
         // Returns the GLSL fragment shader string for this effect.
         virtual std::string output_fragment_shader() = 0;
  
@@ -243,45 +349,76 @@ public:
  protected:
         // Register a parameter. Whenever set_*() is called with the same key,
         // it will update the value in the given pointer (typically a pointer
-       // to some private member variable in your effect).
+       // to some private member variable in your effect). It will also
+       // register a uniform of the same name (plus an arbitrary prefix
+       // which you can access using the PREFIX macro) that you can access.
         //
         // Neither of these take ownership of the pointer.
  
-       // int is special since GLSL pre-1.30 doesn't have integer uniforms.
-       // Thus, ints that you register will _not_ be converted to GLSL uniforms.
+       // These correspond directly to int/float/vec2/vec3/vec4 in GLSL.
         void register_int(const std::string &key, int *value);
-
-       // These correspond directly to float/vec2/vec3/vec4 in GLSL.
         void register_float(const std::string &key, float *value);
         void register_vec2(const std::string &key, float *values);
         void register_vec3(const std::string &key, float *values);
         void register_vec4(const std::string &key, float *values);
  
-       // This will register a 1D texture, which will be bound to a sampler
-       // when your GLSL code runs (so it corresponds 1:1 to a sampler2D uniform
-       // in GLSL).
+       // Register uniforms, such that they will automatically be set
+       // before the shader runs. This is more efficient than set_uniform_*
+       // in effect_util.h, because it doesn't need to do name lookups
+       // every time. Also, in the future, it will use uniform buffer objects
+       // (UBOs) if available to reduce the number of calls into the driver.
         //
-       // Note that if you change the contents of <values>, you will need to
-       // call invalidate_1d_texture() to have the picture re-uploaded on the
-       // next frame. This is in contrast to all the other parameters, which are
-       // set anew every frame.
-       void register_1d_texture(const std::string &key, float *values, size_t size);
-       void invalidate_1d_texture(const std::string &key);
-       
-private:
-       struct Texture1D {
-               float *values;
-               size_t size;
-               bool needs_update;
-               GLuint texture_num;
-       };
+       // May not be called after output_fragment_shader() has returned.
+       // The pointer must be valid for the entire lifetime of the Effect,
+       // since the value is pulled from it each execution. The value is
+       // guaranteed to be read after set_gl_state() for the effect has
+       // returned, so you can safely update its value from there.
+       //
+       // Note that this will also declare the uniform in the shader for you,
+       // so you should not do that yourself. (This is so it can be part of
+       // the right uniform block.) However, it is probably a good idea to
+       // have a commented-out declaration so that it is easier to see the
+       // type and thus understand the shader on its own.
+       //
+       // Calling register_* will automatically imply register_uniform_*,
+       // except for register_int as noted above.
+       void register_uniform_sampler2d(const std::string &key, const int *value);
+       void register_uniform_bool(const std::string &key, const bool *value);
+       void register_uniform_int(const std::string &key, const int *value);  // Note: Requires GLSL 1.30 or newer.
+       void register_uniform_float(const std::string &key, const float *value);
+       void register_uniform_vec2(const std::string &key, const float *values);
+       void register_uniform_vec3(const std::string &key, const float *values);
+       void register_uniform_vec4(const std::string &key, const float *values);
+       void register_uniform_float_array(const std::string &key, const float *values, size_t num_values);
+       void register_uniform_vec2_array(const std::string &key, const float *values, size_t num_values);
+       void register_uniform_vec3_array(const std::string &key, const float *values, size_t num_values);
+       void register_uniform_vec4_array(const std::string &key, const float *values, size_t num_values);
+       void register_uniform_mat3(const std::string &key, const Eigen::Matrix3d *matrix);
  
+private:
         std::map<std::string, int *> params_int;
         std::map<std::string, float *> params_float;
         std::map<std::string, float *> params_vec2;
         std::map<std::string, float *> params_vec3;
         std::map<std::string, float *> params_vec4;
-       std::map<std::string, Texture1D> params_tex_1d;
+
+       // Picked out by EffectChain during finalization.
+       std::vector<Uniform<int> > uniforms_image2d;
+       std::vector<Uniform<int> > uniforms_sampler2d;
+       std::vector<Uniform<bool> > uniforms_bool;
+       std::vector<Uniform<int> > uniforms_int;
+       std::vector<Uniform<float> > uniforms_float;
+       std::vector<Uniform<float> > uniforms_vec2;
+       std::vector<Uniform<float> > uniforms_vec3;
+       std::vector<Uniform<float> > uniforms_vec4;
+       std::vector<Uniform<float> > uniforms_float_array;
+       std::vector<Uniform<float> > uniforms_vec2_array;
+       std::vector<Uniform<float> > uniforms_vec3_array;
+       std::vector<Uniform<float> > uniforms_vec4_array;
+       std::vector<Uniform<Eigen::Matrix3d> > uniforms_mat3;
+       friend class EffectChain;
  };
  
-#endif // !defined(_EFFECT_H)
+}  // namespace movit
+
+#endif // !defined(_MOVIT_EFFECT_H)