#define _FLOW_H 1
// Code for computing optical flow between two images, and using it to interpolate
-// in-between frames. The main user interface is the Interpolate class.
+// in-between frames. The main user interface is the DISComputeFlow and Interpolate
+// classes (also GrayscaleConversion can be useful).
#include <stdint.h>
#include <epoxy/gl.h>
#include <array>
#include <map>
+#include <mutex>
#include <vector>
#include <utility>
class ScopedTimer;
+// Predefined operating points from the paper.
+struct OperatingPoint {
+ unsigned coarsest_level; // TODO: Adjust dynamically based on the resolution?
+ unsigned finest_level;
+ unsigned search_iterations; // Halved from the paper.
+ unsigned patch_size_pixels;
+ float patch_overlap_ratio;
+ bool variational_refinement;
+
+ // Not part of the original paper; used for interpolation.
+ // NOTE: Values much larger than 1.0 seems to trigger Haswell's “PMA stall”;
+ // the problem is not present on Broadwell and higher (there's a mitigation
+ // in the hardware, but Mesa doesn't enable it at the time of writing).
+ // Since we have hole filling, the holes from 1.0 are not critical,
+ // but larger values seem to do better than hole filling for large
+ // motion, blurs etc. since we have more candidates.
+ float splat_size;
+};
+
+// Operating point 1 (600 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point1 = {
+ 5, // Coarsest level.
+ 3, // Finest level.
+ 8, // Search iterations.
+ 8, // Patch size (pixels).
+ 0.30f, // Overlap ratio.
+ false, // Variational refinement.
+ 1.0f // Splat size (pixels).
+};
+
+// Operating point 2 (300 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point2 = {
+ 5, // Coarsest level.
+ 3, // Finest level.
+ 6, // Search iterations.
+ 8, // Patch size (pixels).
+ 0.40f, // Overlap ratio.
+ true, // Variational refinement.
+ 1.0f // Splat size (pixels).
+};
+
+// Operating point 3 (10 Hz on CPU, excluding preprocessing).
+// This is the only one that has been thorougly tested.
+static constexpr OperatingPoint operating_point3 = {
+ 5, // Coarsest level.
+ 1, // Finest level.
+ 8, // Search iterations.
+ 12, // Patch size (pixels).
+ 0.75f, // Overlap ratio.
+ true, // Variational refinement.
+ 4.0f // Splat size (pixels).
+};
+
+// Operating point 4 (0.5 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point4 = {
+ 5, // Coarsest level.
+ 0, // Finest level.
+ 128, // Search iterations.
+ 12, // Patch size (pixels).
+ 0.75f, // Overlap ratio.
+ true, // Variational refinement.
+ 8.0f // Splat size (pixels).
+};
+
+int find_num_levels(int width, int height);
+
// A class that caches FBOs that render to a given set of textures.
// It never frees anything, so it is only suitable for rendering to
// the same (small) set of textures over and over again.
// Motion search to find the initial flow. See motion_search.frag for documentation.
class MotionSearch {
public:
- MotionSearch();
+ MotionSearch(const OperatingPoint &op);
void exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers);
private:
+ const OperatingPoint op;
PersistentFBOSet<1> fbos;
GLuint motion_vs_obj;
GLuint uniform_inv_image_size, uniform_inv_prev_level_size, uniform_out_flow_size;
GLuint uniform_image_tex, uniform_grad_tex, uniform_flow_tex;
+ GLuint uniform_patch_size, uniform_num_iterations;
};
// Do “densification”, ie., upsampling of the flow patches to the flow field
// weight in the B channel. Dividing R and G by B gives the normalized values.
class Densify {
public:
- Densify();
+ Densify(const OperatingPoint &op);
void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers);
private:
+ OperatingPoint op;
PersistentFBOSet<1> fbos;
GLuint densify_vs_obj;
GLuint uniform_scale_factor;
};
+// All operations, except construction and destruction, are thread-safe.
class TexturePool {
public:
GLuint get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers = 0);
bool in_use = false;
bool is_renderbuffer = false;
};
- std::vector<Texture> textures;
+ std::mutex mu;
+ std::vector<Texture> textures; // Under mu.
};
class DISComputeFlow {
public:
- DISComputeFlow(int width, int height);
+ DISComputeFlow(int width, int height, const OperatingPoint &op);
enum FlowDirection {
FORWARD,
GLuint initial_flow_tex;
GLuint vertex_vbo, vao;
TexturePool pool;
+ const OperatingPoint op;
// The various passes.
Sobel sobel;
// radius fills most of the holes.
class Splat {
public:
- Splat();
+ Splat(const OperatingPoint &op);
// alpha is the time of the interpolated frame (0..1).
- void exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
+ void exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
private:
+ const OperatingPoint op;
PersistentFBOSetWithDepth<1> fbos;
GLuint splat_vs_obj;
GLuint splat_program;
GLuint uniform_splat_size, uniform_alpha;
- GLuint uniform_image_tex, uniform_flow_tex;
+ GLuint uniform_gray_tex, uniform_flow_tex;
GLuint uniform_inv_flow_size;
};
class Blend {
public:
- Blend();
- void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int width, int height, float alpha);
+ Blend(bool split_ycbcr_output);
+
+ // output2_tex is only used if split_ycbcr_output was true.
+ void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int width, int height, float alpha);
private:
+ bool split_ycbcr_output;
PersistentFBOSet<1> fbos;
+ PersistentFBOSet<2> fbos_split;
GLuint blend_vs_obj;
GLuint blend_fs_obj;
GLuint blend_program;
class Interpolate {
public:
- Interpolate(int width, int height, int flow_level);
+ Interpolate(int width, int height, const OperatingPoint &op, bool split_ycbcr_output);
- // Returns a texture that must be released with release_texture()
- // after use. image_tex must be a two-layer RGBA8 texture with mipmaps
- // (unless flow_level == 0).
- GLuint exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
+ // Returns a texture (or two, if split_ycbcr_output is true) that must
+ // be released with release_texture() after use. image_tex must be a
+ // two-layer RGBA8 texture with mipmaps (unless flow_level == 0).
+ std::pair<GLuint, GLuint> exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
void release_texture(GLuint tex) {
pool.release_texture(tex);
int width, height, flow_level;
GLuint vertex_vbo, vao;
TexturePool pool;
+ const OperatingPoint op;
+ const bool split_ycbcr_output;
Splat splat;
HoleFill hole_fill;