From 25a6ed1ae58346a868e583b25a1003807f65490a Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 21 Aug 2018 00:34:58 +0200 Subject: [PATCH] Move flow classes into a header file; first step on the way to making it accessible. --- flow.cpp | 471 +---------------------------------------------------- flow.h | 487 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 488 insertions(+), 470 deletions(-) create mode 100644 flow.h diff --git a/flow.cpp b/flow.cpp index b673b9d..81130e2 100644 --- a/flow.cpp +++ b/flow.cpp @@ -15,6 +15,7 @@ #include #include +#include "flow.h" #include "gpu_timers.h" #include "util.h" @@ -235,36 +236,6 @@ void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint te glProgramUniform1i(program, location, texture_unit); } -// A class that caches FBOs that render to a given set of textures. -// It never frees anything, so it is only suitable for rendering to -// the same (small) set of textures over and over again. -template -class PersistentFBOSet { -public: - void render_to(const array &textures); - - // Convenience wrappers. - void render_to(GLuint texture0) { - render_to({{texture0}}); - } - - void render_to(GLuint texture0, GLuint texture1) { - render_to({{texture0, texture1}}); - } - - void render_to(GLuint texture0, GLuint texture1, GLuint texture2) { - render_to({{texture0, texture1, texture2}}); - } - - void render_to(GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) { - render_to({{texture0, texture1, texture2, texture3}}); - } - -private: - // TODO: Delete these on destruction. - map, GLuint> fbos; -}; - template void PersistentFBOSet::render_to(const array &textures) { @@ -287,34 +258,6 @@ void PersistentFBOSet::render_to(const array glBindFramebuffer(GL_FRAMEBUFFER, fbo); } -// Same, but with a depth texture. -template -class PersistentFBOSetWithDepth { -public: - void render_to(GLuint depth_rb, const array &textures); - - // Convenience wrappers. - void render_to(GLuint depth_rb, GLuint texture0) { - render_to(depth_rb, {{texture0}}); - } - - void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1) { - render_to(depth_rb, {{texture0, texture1}}); - } - - void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2) { - render_to(depth_rb, {{texture0, texture1, texture2}}); - } - - void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) { - render_to(depth_rb, {{texture0, texture1, texture2, texture3}}); - } - -private: - // TODO: Delete these on destruction. - map>, GLuint> fbos; -}; - template void PersistentFBOSetWithDepth::render_to(GLuint depth_rb, const array &textures) { @@ -340,22 +283,6 @@ void PersistentFBOSetWithDepth::render_to(GLuint depth_rb, const a glBindFramebuffer(GL_FRAMEBUFFER, fbo); } -// Convert RGB to grayscale, using Rec. 709 coefficients. -class GrayscaleConversion { -public: - GrayscaleConversion(); - void exec(GLint tex, GLint gray_tex, int width, int height, int num_layers); - -private: - PersistentFBOSet<1> fbos; - GLuint gray_vs_obj; - GLuint gray_fs_obj; - GLuint gray_program; - GLuint gray_vao; - - GLuint uniform_tex; -}; - GrayscaleConversion::GrayscaleConversion() { gray_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -385,28 +312,6 @@ void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height, glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Compute gradients in every point, used for the motion search. -// The DIS paper doesn't actually mention how these are computed, -// but seemingly, a 3x3 Sobel operator is used here (at least in -// later versions of the code), while a [1 -8 0 8 -1] kernel is -// used for all the derivatives in the variational refinement part -// (which borrows code from DeepFlow). This is inconsistent, -// but I guess we're better off with staying with the original -// decisions until we actually know having different ones would be better. -class Sobel { -public: - Sobel(); - void exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers); - -private: - PersistentFBOSet<1> fbos; - GLuint sobel_vs_obj; - GLuint sobel_fs_obj; - GLuint sobel_program; - - GLuint uniform_tex; -}; - Sobel::Sobel() { sobel_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -427,23 +332,6 @@ void Sobel::exec(GLint tex_view, GLint grad_tex, int level_width, int level_heig glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Motion search to find the initial flow. See motion_search.frag for documentation. -class MotionSearch { -public: - MotionSearch(); - void exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers); - -private: - PersistentFBOSet<1> fbos; - - GLuint motion_vs_obj; - GLuint motion_fs_obj; - GLuint motion_search_program; - - GLuint uniform_inv_image_size, uniform_inv_prev_level_size, uniform_out_flow_size; - GLuint uniform_image_tex, uniform_grad_tex, uniform_flow_tex; -}; - MotionSearch::MotionSearch() { motion_vs_obj = compile_shader(read_file("motion_search.vert"), GL_VERTEX_SHADER); @@ -475,30 +363,6 @@ void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuin glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Do “densification”, ie., upsampling of the flow patches to the flow field -// (the same size as the image at this level). We draw one quad per patch -// over its entire covered area (using instancing in the vertex shader), -// and then weight the contributions in the pixel shader by post-warp difference. -// This is equation (3) in the paper. -// -// We accumulate the flow vectors in the R/G channels (for u/v) and the total -// weight in the B channel. Dividing R and G by B gives the normalized values. -class Densify { -public: - Densify(); - void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers); - -private: - PersistentFBOSet<1> fbos; - - GLuint densify_vs_obj; - GLuint densify_fs_obj; - GLuint densify_program; - - GLuint uniform_patch_size; - GLuint uniform_image_tex, uniform_flow_tex; -}; - Densify::Densify() { densify_vs_obj = compile_shader(read_file("densify.vert"), GL_VERTEX_SHADER); @@ -530,29 +394,6 @@ void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width_patches * height_patches * num_layers); } -// Warp I_1 to I_w, and then compute the mean (I) and difference (I_t) of -// I_0 and I_w. The prewarping is what enables us to solve the variational -// flow for du,dv instead of u,v. -// -// Also calculates the normalized flow, ie. divides by z (this is needed because -// Densify works by additive blending) and multiplies by the image size. -// -// See variational_refinement.txt for more information. -class Prewarp { -public: - Prewarp(); - void exec(GLuint tex_view, GLuint flow_tex, GLuint normalized_flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height, int num_layers); - -private: - PersistentFBOSet<3> fbos; - - GLuint prewarp_vs_obj; - GLuint prewarp_fs_obj; - GLuint prewarp_program; - - GLuint uniform_image_tex, uniform_flow_tex; -}; - Prewarp::Prewarp() { prewarp_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -576,29 +417,6 @@ void Prewarp::exec(GLuint tex_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_te glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// From I, calculate the partial derivatives I_x and I_y. We use a four-tap -// central difference filter, since apparently, that's tradition (I haven't -// measured quality versus a more normal 0.5 (I[x+1] - I[x-1]).) -// The coefficients come from -// -// https://en.wikipedia.org/wiki/Finite_difference_coefficient -// -// Also computes β_0, since it depends only on I_x and I_y. -class Derivatives { -public: - Derivatives(); - void exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers); - -private: - PersistentFBOSet<2> fbos; - - GLuint derivatives_vs_obj; - GLuint derivatives_fs_obj; - GLuint derivatives_program; - - GLuint uniform_tex; -}; - Derivatives::Derivatives() { derivatives_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -620,28 +438,6 @@ void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, in glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Calculate the diffusivity for each pixels, g(x,y). Smoothness (s) will -// be calculated in the shaders on-the-fly by sampling in-between two -// neighboring g(x,y) pixels, plus a border tweak to make sure we get -// zero smoothness at the border. -// -// See variational_refinement.txt for more information. -class ComputeDiffusivity { -public: - ComputeDiffusivity(); - void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers); - -private: - PersistentFBOSet<1> fbos; - - GLuint diffusivity_vs_obj; - GLuint diffusivity_fs_obj; - GLuint diffusivity_program; - - GLuint uniform_flow_tex, uniform_diff_flow_tex; - GLuint uniform_alpha, uniform_zero_diff_flow; -}; - ComputeDiffusivity::ComputeDiffusivity() { diffusivity_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -670,44 +466,6 @@ void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diff glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Set up the equations set (two equations in two unknowns, per pixel). -// We store five floats; the three non-redundant elements of the 2x2 matrix (A) -// as 32-bit floats, and the two elements on the right-hand side (b) as 16-bit -// floats. (Actually, we store the inverse of the diagonal elements, because -// we only ever need to divide by them.) This fits into four u32 values; -// R, G, B for the matrix (the last element is symmetric) and A for the two b values. -// All the values of the energy term (E_I, E_G, E_S), except the smoothness -// terms that depend on other pixels, are calculated in one pass. -// -// The equation set is split in two; one contains only the pixels needed for -// the red pass, and one only for the black pass (see sor.frag). This reduces -// the amount of data the SOR shader has to pull in, at the cost of some -// complexity when the equation texture ends up with half the size and we need -// to adjust texture coordinates. The contraction is done along the horizontal -// axis, so that on even rows (0, 2, 4, ...), the “red” texture will contain -// pixels 0, 2, 4, 6, etc., and on odd rows 1, 3, 5, etc.. -// -// See variational_refinement.txt for more information about the actual -// equations in use. -class SetupEquations { -public: - SetupEquations(); - void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers); - -private: - PersistentFBOSet<2> fbos; - - GLuint equations_vs_obj; - GLuint equations_fs_obj; - GLuint equations_program; - - GLuint uniform_I_x_y_tex, uniform_I_t_tex; - GLuint uniform_diff_flow_tex, uniform_base_flow_tex; - GLuint uniform_beta_0_tex; - GLuint uniform_diffusivity_tex; - GLuint uniform_gamma, uniform_delta, uniform_zero_diff_flow; -}; - SetupEquations::SetupEquations() { equations_vs_obj = compile_shader(read_file("equations.vert"), GL_VERTEX_SHADER); @@ -745,28 +503,6 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Actually solve the equation sets made by SetupEquations, by means of -// successive over-relaxation (SOR). -// -// See variational_refinement.txt for more information. -class SOR { -public: - SOR(); - void exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer); - -private: - PersistentFBOSet<1> fbos; - - GLuint sor_vs_obj; - GLuint sor_fs_obj; - GLuint sor_program; - - GLuint uniform_diff_flow_tex; - GLuint uniform_equation_red_tex, uniform_equation_black_tex; - GLuint uniform_diffusivity_tex; - GLuint uniform_phase, uniform_num_nonzero_phases; -}; - SOR::SOR() { sor_vs_obj = compile_shader(read_file("sor.vert"), GL_VERTEX_SHADER); @@ -829,23 +565,6 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl } } -// Simply add the differential flow found by the variational refinement to the base flow. -// The output is in base_flow_tex; we don't need to make a new texture. -class AddBaseFlow { -public: - AddBaseFlow(); - void exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers); - -private: - PersistentFBOSet<1> fbos; - - GLuint add_flow_vs_obj; - GLuint add_flow_fs_obj; - GLuint add_flow_program; - - GLuint uniform_diff_flow_tex; -}; - AddBaseFlow::AddBaseFlow() { add_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -869,23 +588,6 @@ void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_wid glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -// Take a copy of the flow, bilinearly interpolated and scaled up. -class ResizeFlow { -public: - ResizeFlow(); - void exec(GLuint in_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers); - -private: - PersistentFBOSet<1> fbos; - - GLuint resize_flow_vs_obj; - GLuint resize_flow_fs_obj; - GLuint resize_flow_program; - - GLuint uniform_flow_tex; - GLuint uniform_scale_factor; -}; - ResizeFlow::ResizeFlow() { resize_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -911,65 +613,6 @@ void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int inpu glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers); } -class TexturePool { -public: - GLuint get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers = 0); - void release_texture(GLuint tex_num); - GLuint get_renderbuffer(GLenum format, GLuint width, GLuint height); - void release_renderbuffer(GLuint tex_num); - -private: - struct Texture { - GLuint tex_num; - GLenum format; - GLuint width, height, num_layers; - bool in_use = false; - bool is_renderbuffer = false; - }; - vector textures; -}; - -class DISComputeFlow { -public: - DISComputeFlow(int width, int height); - - enum FlowDirection { - FORWARD, - FORWARD_AND_BACKWARD - }; - enum ResizeStrategy { - DO_NOT_RESIZE_FLOW, - RESIZE_FLOW_TO_FULL_SIZE - }; - - // The texture must have two layers (first and second frame). - // Returns a texture that must be released with release_texture() - // after use. - GLuint exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy); - - void release_texture(GLuint tex) { - pool.release_texture(tex); - } - -private: - int width, height; - GLuint initial_flow_tex; - GLuint vertex_vbo, vao; - TexturePool pool; - - // The various passes. - Sobel sobel; - MotionSearch motion_search; - Densify densify; - Prewarp prewarp; - Derivatives derivatives; - ComputeDiffusivity compute_diffusivity; - SetupEquations setup_equations; - SOR sor; - AddBaseFlow add_base_flow; - ResizeFlow resize_flow; -}; - DISComputeFlow::DISComputeFlow(int width, int height) : width(width), height(height) { @@ -1199,27 +842,6 @@ GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStra } } -// Forward-warp the flow half-way (or rather, by alpha). A non-zero “splatting” -// radius fills most of the holes. -class Splat { -public: - Splat(); - - // alpha is the time of the interpolated frame (0..1). - void exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha); - -private: - PersistentFBOSetWithDepth<1> fbos; - - GLuint splat_vs_obj; - GLuint splat_fs_obj; - GLuint splat_program; - - GLuint uniform_splat_size, uniform_alpha; - GLuint uniform_image_tex, uniform_flow_tex; - GLuint uniform_inv_flow_size; -}; - Splat::Splat() { splat_vs_obj = compile_shader(read_file("splat.vert"), GL_VERTEX_SHADER); @@ -1267,39 +889,6 @@ void Splat::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_te glDisable(GL_DEPTH_TEST); } -// Doing good and fast hole-filling on a GPU is nontrivial. We choose an option -// that's fairly simple (given that most holes are really small) and also hopefully -// cheap should the holes not be so small. Conceptually, we look for the first -// non-hole to the left of us (ie., shoot a ray until we hit something), then -// the first non-hole to the right of us, then up and down, and then average them -// all together. It's going to create “stars” if the holes are big, but OK, that's -// a tradeoff. -// -// Our implementation here is efficient assuming that the hierarchical Z-buffer is -// on even for shaders that do discard (this typically kills early Z, but hopefully -// not hierarchical Z); we set up Z so that only holes are written to, which means -// that as soon as a hole is filled, the rasterizer should just skip it. Most of the -// fullscreen quads should just be discarded outright, really. -class HoleFill { -public: - HoleFill(); - - // Output will be in flow_tex, temp_tex[0, 1, 2], representing the filling - // from the down, left, right and up, respectively. Use HoleBlend to merge - // them into one. - void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height); - -private: - PersistentFBOSetWithDepth<1> fbos; - - GLuint fill_vs_obj; - GLuint fill_fs_obj; - GLuint fill_program; - - GLuint uniform_tex; - GLuint uniform_z, uniform_sample_offset; -}; - HoleFill::HoleFill() { fill_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER); @@ -1364,25 +953,6 @@ void HoleFill::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int wi glDisable(GL_DEPTH_TEST); } -// Blend the four directions from HoleFill into one pixel, so that single-pixel -// holes become the average of their four neighbors. -class HoleBlend { -public: - HoleBlend(); - - void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height); - -private: - PersistentFBOSetWithDepth<1> fbos; - - GLuint blend_vs_obj; - GLuint blend_fs_obj; - GLuint blend_program; - - GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex; - GLuint uniform_z, uniform_sample_offset; -}; - HoleBlend::HoleBlend() { blend_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill. @@ -1421,21 +991,6 @@ void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int w glDisable(GL_DEPTH_TEST); } -class Blend { -public: - Blend(); - void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int width, int height, float alpha); - -private: - PersistentFBOSet<1> fbos; - GLuint blend_vs_obj; - GLuint blend_fs_obj; - GLuint blend_program; - - GLuint uniform_image_tex, uniform_flow_tex; - GLuint uniform_alpha, uniform_flow_consistency_tolerance; -}; - Blend::Blend() { blend_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); @@ -1461,30 +1016,6 @@ void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int level glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } -class Interpolate { -public: - Interpolate(int width, int height, int flow_level); - - // Returns a texture that must be released with release_texture() - // after use. image_tex must be a two-layer RGBA8 texture with mipmaps - // (unless flow_level == 0). - GLuint exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha); - - void release_texture(GLuint tex) { - pool.release_texture(tex); - } - -private: - int width, height, flow_level; - GLuint vertex_vbo, vao; - TexturePool pool; - - Splat splat; - HoleFill hole_fill; - HoleBlend hole_blend; - Blend blend; -}; - Interpolate::Interpolate(int width, int height, int flow_level) : width(width), height(height), flow_level(flow_level) { // Set up the vertex data that will be shared between all passes. diff --git a/flow.h b/flow.h new file mode 100644 index 0000000..d8a80a4 --- /dev/null +++ b/flow.h @@ -0,0 +1,487 @@ +#ifndef _FLOW_H +#define _FLOW_H 1 + +// Code for computing optical flow between two images, and using it to interpolate +// in-between frames. The main user interface is the Interpolate class. + +#include +#include +#include +#include +#include +#include + +class ScopedTimer; + +// A class that caches FBOs that render to a given set of textures. +// It never frees anything, so it is only suitable for rendering to +// the same (small) set of textures over and over again. +template +class PersistentFBOSet { +public: + void render_to(const std::array &textures); + + // Convenience wrappers. + void render_to(GLuint texture0) { + render_to({{texture0}}); + } + + void render_to(GLuint texture0, GLuint texture1) { + render_to({{texture0, texture1}}); + } + + void render_to(GLuint texture0, GLuint texture1, GLuint texture2) { + render_to({{texture0, texture1, texture2}}); + } + + void render_to(GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) { + render_to({{texture0, texture1, texture2, texture3}}); + } + +private: + // TODO: Delete these on destruction. + std::map, GLuint> fbos; +}; + + +// Same, but with a depth texture. +template +class PersistentFBOSetWithDepth { +public: + void render_to(GLuint depth_rb, const std::array &textures); + + // Convenience wrappers. + void render_to(GLuint depth_rb, GLuint texture0) { + render_to(depth_rb, {{texture0}}); + } + + void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1) { + render_to(depth_rb, {{texture0, texture1}}); + } + + void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2) { + render_to(depth_rb, {{texture0, texture1, texture2}}); + } + + void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) { + render_to(depth_rb, {{texture0, texture1, texture2, texture3}}); + } + +private: + // TODO: Delete these on destruction. + std::map>, GLuint> fbos; +}; + +// Convert RGB to grayscale, using Rec. 709 coefficients. +class GrayscaleConversion { +public: + GrayscaleConversion(); + void exec(GLint tex, GLint gray_tex, int width, int height, int num_layers); + +private: + PersistentFBOSet<1> fbos; + GLuint gray_vs_obj; + GLuint gray_fs_obj; + GLuint gray_program; + GLuint gray_vao; + + GLuint uniform_tex; +}; + +// Compute gradients in every point, used for the motion search. +// The DIS paper doesn't actually mention how these are computed, +// but seemingly, a 3x3 Sobel operator is used here (at least in +// later versions of the code), while a [1 -8 0 8 -1] kernel is +// used for all the derivatives in the variational refinement part +// (which borrows code from DeepFlow). This is inconsistent, +// but I guess we're better off with staying with the original +// decisions until we actually know having different ones would be better. +class Sobel { +public: + Sobel(); + void exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers); + +private: + PersistentFBOSet<1> fbos; + GLuint sobel_vs_obj; + GLuint sobel_fs_obj; + GLuint sobel_program; + + GLuint uniform_tex; +}; + +// Motion search to find the initial flow. See motion_search.frag for documentation. +class MotionSearch { +public: + MotionSearch(); + void exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers); + +private: + PersistentFBOSet<1> fbos; + + GLuint motion_vs_obj; + GLuint motion_fs_obj; + GLuint motion_search_program; + + GLuint uniform_inv_image_size, uniform_inv_prev_level_size, uniform_out_flow_size; + GLuint uniform_image_tex, uniform_grad_tex, uniform_flow_tex; +}; + +// Do “densification”, ie., upsampling of the flow patches to the flow field +// (the same size as the image at this level). We draw one quad per patch +// over its entire covered area (using instancing in the vertex shader), +// and then weight the contributions in the pixel shader by post-warp difference. +// This is equation (3) in the paper. +// +// We accumulate the flow vectors in the R/G channels (for u/v) and the total +// weight in the B channel. Dividing R and G by B gives the normalized values. +class Densify { +public: + Densify(); + void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers); + +private: + PersistentFBOSet<1> fbos; + + GLuint densify_vs_obj; + GLuint densify_fs_obj; + GLuint densify_program; + + GLuint uniform_patch_size; + GLuint uniform_image_tex, uniform_flow_tex; +}; + +// Warp I_1 to I_w, and then compute the mean (I) and difference (I_t) of +// I_0 and I_w. The prewarping is what enables us to solve the variational +// flow for du,dv instead of u,v. +// +// Also calculates the normalized flow, ie. divides by z (this is needed because +// Densify works by additive blending) and multiplies by the image size. +// +// See variational_refinement.txt for more information. +class Prewarp { +public: + Prewarp(); + void exec(GLuint tex_view, GLuint flow_tex, GLuint normalized_flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height, int num_layers); + +private: + PersistentFBOSet<3> fbos; + + GLuint prewarp_vs_obj; + GLuint prewarp_fs_obj; + GLuint prewarp_program; + + GLuint uniform_image_tex, uniform_flow_tex; +}; + +// From I, calculate the partial derivatives I_x and I_y. We use a four-tap +// central difference filter, since apparently, that's tradition (I haven't +// measured quality versus a more normal 0.5 (I[x+1] - I[x-1]).) +// The coefficients come from +// +// https://en.wikipedia.org/wiki/Finite_difference_coefficient +// +// Also computes β_0, since it depends only on I_x and I_y. +class Derivatives { +public: + Derivatives(); + void exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers); + +private: + PersistentFBOSet<2> fbos; + + GLuint derivatives_vs_obj; + GLuint derivatives_fs_obj; + GLuint derivatives_program; + + GLuint uniform_tex; +}; + +// Calculate the diffusivity for each pixels, g(x,y). Smoothness (s) will +// be calculated in the shaders on-the-fly by sampling in-between two +// neighboring g(x,y) pixels, plus a border tweak to make sure we get +// zero smoothness at the border. +// +// See variational_refinement.txt for more information. +class ComputeDiffusivity { +public: + ComputeDiffusivity(); + void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers); + +private: + PersistentFBOSet<1> fbos; + + GLuint diffusivity_vs_obj; + GLuint diffusivity_fs_obj; + GLuint diffusivity_program; + + GLuint uniform_flow_tex, uniform_diff_flow_tex; + GLuint uniform_alpha, uniform_zero_diff_flow; +}; + +// Set up the equations set (two equations in two unknowns, per pixel). +// We store five floats; the three non-redundant elements of the 2x2 matrix (A) +// as 32-bit floats, and the two elements on the right-hand side (b) as 16-bit +// floats. (Actually, we store the inverse of the diagonal elements, because +// we only ever need to divide by them.) This fits into four u32 values; +// R, G, B for the matrix (the last element is symmetric) and A for the two b values. +// All the values of the energy term (E_I, E_G, E_S), except the smoothness +// terms that depend on other pixels, are calculated in one pass. +// +// The equation set is split in two; one contains only the pixels needed for +// the red pass, and one only for the black pass (see sor.frag). This reduces +// the amount of data the SOR shader has to pull in, at the cost of some +// complexity when the equation texture ends up with half the size and we need +// to adjust texture coordinates. The contraction is done along the horizontal +// axis, so that on even rows (0, 2, 4, ...), the “red” texture will contain +// pixels 0, 2, 4, 6, etc., and on odd rows 1, 3, 5, etc.. +// +// See variational_refinement.txt for more information about the actual +// equations in use. +class SetupEquations { +public: + SetupEquations(); + void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers); + +private: + PersistentFBOSet<2> fbos; + + GLuint equations_vs_obj; + GLuint equations_fs_obj; + GLuint equations_program; + + GLuint uniform_I_x_y_tex, uniform_I_t_tex; + GLuint uniform_diff_flow_tex, uniform_base_flow_tex; + GLuint uniform_beta_0_tex; + GLuint uniform_diffusivity_tex; + GLuint uniform_gamma, uniform_delta, uniform_zero_diff_flow; +}; + +// Actually solve the equation sets made by SetupEquations, by means of +// successive over-relaxation (SOR). +// +// See variational_refinement.txt for more information. +class SOR { +public: + SOR(); + void exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer); + +private: + PersistentFBOSet<1> fbos; + + GLuint sor_vs_obj; + GLuint sor_fs_obj; + GLuint sor_program; + + GLuint uniform_diff_flow_tex; + GLuint uniform_equation_red_tex, uniform_equation_black_tex; + GLuint uniform_diffusivity_tex; + GLuint uniform_phase, uniform_num_nonzero_phases; +}; + +// Simply add the differential flow found by the variational refinement to the base flow. +// The output is in base_flow_tex; we don't need to make a new texture. +class AddBaseFlow { +public: + AddBaseFlow(); + void exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers); + +private: + PersistentFBOSet<1> fbos; + + GLuint add_flow_vs_obj; + GLuint add_flow_fs_obj; + GLuint add_flow_program; + + GLuint uniform_diff_flow_tex; +}; + +// Take a copy of the flow, bilinearly interpolated and scaled up. +class ResizeFlow { +public: + ResizeFlow(); + void exec(GLuint in_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers); + +private: + PersistentFBOSet<1> fbos; + + GLuint resize_flow_vs_obj; + GLuint resize_flow_fs_obj; + GLuint resize_flow_program; + + GLuint uniform_flow_tex; + GLuint uniform_scale_factor; +}; + +class TexturePool { +public: + GLuint get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers = 0); + void release_texture(GLuint tex_num); + GLuint get_renderbuffer(GLenum format, GLuint width, GLuint height); + void release_renderbuffer(GLuint tex_num); + +private: + struct Texture { + GLuint tex_num; + GLenum format; + GLuint width, height, num_layers; + bool in_use = false; + bool is_renderbuffer = false; + }; + std::vector textures; +}; + +class DISComputeFlow { +public: + DISComputeFlow(int width, int height); + + enum FlowDirection { + FORWARD, + FORWARD_AND_BACKWARD + }; + enum ResizeStrategy { + DO_NOT_RESIZE_FLOW, + RESIZE_FLOW_TO_FULL_SIZE + }; + + // The texture must have two layers (first and second frame). + // Returns a texture that must be released with release_texture() + // after use. + GLuint exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy); + + void release_texture(GLuint tex) { + pool.release_texture(tex); + } + +private: + int width, height; + GLuint initial_flow_tex; + GLuint vertex_vbo, vao; + TexturePool pool; + + // The various passes. + Sobel sobel; + MotionSearch motion_search; + Densify densify; + Prewarp prewarp; + Derivatives derivatives; + ComputeDiffusivity compute_diffusivity; + SetupEquations setup_equations; + SOR sor; + AddBaseFlow add_base_flow; + ResizeFlow resize_flow; +}; + +// Forward-warp the flow half-way (or rather, by alpha). A non-zero “splatting” +// radius fills most of the holes. +class Splat { +public: + Splat(); + + // alpha is the time of the interpolated frame (0..1). + void exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha); + +private: + PersistentFBOSetWithDepth<1> fbos; + + GLuint splat_vs_obj; + GLuint splat_fs_obj; + GLuint splat_program; + + GLuint uniform_splat_size, uniform_alpha; + GLuint uniform_image_tex, uniform_flow_tex; + GLuint uniform_inv_flow_size; +}; + +// Doing good and fast hole-filling on a GPU is nontrivial. We choose an option +// that's fairly simple (given that most holes are really small) and also hopefully +// cheap should the holes not be so small. Conceptually, we look for the first +// non-hole to the left of us (ie., shoot a ray until we hit something), then +// the first non-hole to the right of us, then up and down, and then average them +// all together. It's going to create “stars” if the holes are big, but OK, that's +// a tradeoff. +// +// Our implementation here is efficient assuming that the hierarchical Z-buffer is +// on even for shaders that do discard (this typically kills early Z, but hopefully +// not hierarchical Z); we set up Z so that only holes are written to, which means +// that as soon as a hole is filled, the rasterizer should just skip it. Most of the +// fullscreen quads should just be discarded outright, really. +class HoleFill { +public: + HoleFill(); + + // Output will be in flow_tex, temp_tex[0, 1, 2], representing the filling + // from the down, left, right and up, respectively. Use HoleBlend to merge + // them into one. + void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height); + +private: + PersistentFBOSetWithDepth<1> fbos; + + GLuint fill_vs_obj; + GLuint fill_fs_obj; + GLuint fill_program; + + GLuint uniform_tex; + GLuint uniform_z, uniform_sample_offset; +}; + +// Blend the four directions from HoleFill into one pixel, so that single-pixel +// holes become the average of their four neighbors. +class HoleBlend { +public: + HoleBlend(); + + void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height); + +private: + PersistentFBOSetWithDepth<1> fbos; + + GLuint blend_vs_obj; + GLuint blend_fs_obj; + GLuint blend_program; + + GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex; + GLuint uniform_z, uniform_sample_offset; +}; + +class Blend { +public: + Blend(); + void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int width, int height, float alpha); + +private: + PersistentFBOSet<1> fbos; + GLuint blend_vs_obj; + GLuint blend_fs_obj; + GLuint blend_program; + + GLuint uniform_image_tex, uniform_flow_tex; + GLuint uniform_alpha, uniform_flow_consistency_tolerance; +}; + +class Interpolate { +public: + Interpolate(int width, int height, int flow_level); + + // Returns a texture that must be released with release_texture() + // after use. image_tex must be a two-layer RGBA8 texture with mipmaps + // (unless flow_level == 0). + GLuint exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha); + + void release_texture(GLuint tex) { + pool.release_texture(tex); + } + +private: + int width, height, flow_level; + GLuint vertex_vbo, vao; + TexturePool pool; + + Splat splat; + HoleFill hole_fill; + HoleBlend hole_blend; + Blend blend; +}; + +#endif // !defined(_FLOW_H) -- 2.39.2