+ // Set up the 2x2 equation system for each pixel.
+ {
+ ScopedTimer timer("Set up equations", &varref_timer);
+ setup_equations.exec(I_x_y_tex, I_t_tex, du_dv_tex, base_flow_tex, beta_0_tex, smoothness_x_tex, smoothness_y_tex, equation_tex, level_width, level_height);
+ }
+
+ // Run a few SOR (or quasi-SOR, since we're not really Jacobi) iterations.
+ // Note that these are to/from the same texture.
+ {
+ ScopedTimer timer("SOR", &varref_timer);
+ sor.exec(du_dv_tex, equation_tex, smoothness_x_tex, smoothness_y_tex, level_width, level_height, 5);
+ }
+ }
+
+ pool.release_texture(I_t_tex);
+ pool.release_texture(I_x_y_tex);
+ pool.release_texture(beta_0_tex);
+ pool.release_texture(smoothness_x_tex);
+ pool.release_texture(smoothness_y_tex);
+ pool.release_texture(equation_tex);
+
+ // Add the differential flow found by the variational refinement to the base flow,
+ // giving the final flow estimate for this level.
+ // The output is in diff_flow_tex; we don't need to make a new texture.
+ //
+ // Disabling this doesn't save any time (although we could easily make it so that
+ // it is more efficient), but it helps debug the motion search.
+ if (enable_variational_refinement) {
+ ScopedTimer timer("Add differential flow", &varref_timer);
+ add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height);
+ }
+ pool.release_texture(du_dv_tex);
+
+ if (prev_level_flow_tex != initial_flow_tex) {
+ pool.release_texture(prev_level_flow_tex);
+ }
+ prev_level_flow_tex = base_flow_tex;
+ prev_level_width = level_width;
+ prev_level_height = level_height;
+ }
+ total_timer.end();
+
+ timers.print();
+
+ // Scale up the flow to the final size (if needed).
+ if (finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
+ return prev_level_flow_tex;
+ } else {
+ GLuint final_tex = pool.get_texture(GL_RG16F, width, height);
+ resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height);
+ pool.release_texture(prev_level_flow_tex);
+ return final_tex;
+ }
+}
+
+// Forward-warp the flow half-way (or rather, by alpha). A non-zero “splatting”
+// radius fills most of the holes.
+class Splat {
+public:
+ Splat();
+
+ // alpha is the time of the interpolated frame (0..1).
+ void exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint flow_tex, GLuint depth_tex, int width, int height, float alpha);
+
+private:
+ PersistentFBOSet<2> fbos;
+
+ GLuint splat_vs_obj;
+ GLuint splat_fs_obj;
+ GLuint splat_program;
+ GLuint splat_vao;
+
+ GLuint uniform_invert_flow, uniform_splat_size, uniform_alpha;
+ GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
+ GLuint uniform_inv_flow_size;
+};
+
+Splat::Splat()
+{
+ splat_vs_obj = compile_shader(read_file("splat.vert"), GL_VERTEX_SHADER);
+ splat_fs_obj = compile_shader(read_file("splat.frag"), GL_FRAGMENT_SHADER);
+ splat_program = link_program(splat_vs_obj, splat_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &splat_vao);
+ glBindVertexArray(splat_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(splat_program, "position");
+ glEnableVertexArrayAttrib(splat_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_invert_flow = glGetUniformLocation(splat_program, "invert_flow");
+ uniform_splat_size = glGetUniformLocation(splat_program, "splat_size");
+ uniform_alpha = glGetUniformLocation(splat_program, "alpha");
+ uniform_image0_tex = glGetUniformLocation(splat_program, "image0_tex");
+ uniform_image1_tex = glGetUniformLocation(splat_program, "image1_tex");
+ uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex");
+ uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size");
+}
+
+void Splat::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint flow_tex, GLuint depth_tex, int width, int height, float alpha)
+{
+ glUseProgram(splat_program);
+
+ bind_sampler(splat_program, uniform_image0_tex, 0, tex0, linear_sampler);
+ bind_sampler(splat_program, uniform_image1_tex, 1, tex1, linear_sampler);
+
+ // FIXME: This is set to 1.0 right now so not to trigger Haswell's “PMA stall”.
+ // Move to 2.0 later.
+ float splat_size = 1.0f; // 4x4 splat means 16x overdraw, 2x2 splat means 4x overdraw.
+ glProgramUniform2f(splat_program, uniform_splat_size, splat_size / width, splat_size / height);
+ glProgramUniform1f(splat_program, uniform_alpha, alpha);
+ glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LESS); // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.)
+ glBindVertexArray(splat_vao);
+
+ // FIXME: Get this into FBOSet, so we can reuse FBOs across frames.
+ GLuint fbo;
+ glCreateFramebuffers(1, &fbo);
+ glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0, flow_tex, 0);
+ glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+ // Do forward splatting.
+ bind_sampler(splat_program, uniform_flow_tex, 2, forward_flow_tex, nearest_sampler);
+ glProgramUniform1i(splat_program, uniform_invert_flow, 0);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height);
+
+ // Do backward splatting.
+ bind_sampler(splat_program, uniform_flow_tex, 2, backward_flow_tex, nearest_sampler);
+ glProgramUniform1i(splat_program, uniform_invert_flow, 1);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height);
+
+ glDisable(GL_DEPTH_TEST);
+
+ glDeleteFramebuffers(1, &fbo);
+}
+
+class Blend {
+public:
+ Blend();
+ void exec(GLuint tex0, GLuint tex1, GLuint flow_tex, GLuint output_tex, int width, int height, float alpha);
+
+private:
+ PersistentFBOSet<1> fbos;
+ GLuint blend_vs_obj;
+ GLuint blend_fs_obj;
+ GLuint blend_program;
+ GLuint blend_vao;
+
+ GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
+ GLuint uniform_alpha, uniform_flow_consistency_tolerance;
+};
+
+Blend::Blend()
+{
+ blend_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ blend_fs_obj = compile_shader(read_file("blend.frag"), GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &blend_vao);
+ glBindVertexArray(blend_vao);
+
+ GLint position_attrib = glGetAttribLocation(blend_program, "position");
+ glEnableVertexArrayAttrib(blend_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_image0_tex = glGetUniformLocation(blend_program, "image0_tex");
+ uniform_image1_tex = glGetUniformLocation(blend_program, "image1_tex");
+ uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
+ uniform_alpha = glGetUniformLocation(blend_program, "alpha");
+ uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
+}
+
+void Blend::exec(GLuint tex0, GLuint tex1, GLuint flow_tex, GLuint output_tex, int level_width, int level_height, float alpha)
+{
+ glUseProgram(blend_program);
+ bind_sampler(blend_program, uniform_image0_tex, 0, tex0, linear_sampler);
+ bind_sampler(blend_program, uniform_image1_tex, 1, tex1, linear_sampler);
+ bind_sampler(blend_program, uniform_flow_tex, 2, flow_tex, linear_sampler); // May be upsampled.
+ glProgramUniform1f(blend_program, uniform_alpha, alpha);
+ //glProgramUniform1f(blend_program, uniform_flow_consistency_tolerance, 1.0f /
+
+ glViewport(0, 0, level_width, level_height);
+ fbos.render_to(output_tex);
+ glBindVertexArray(blend_vao);
+ glUseProgram(blend_program);
+ glDisable(GL_BLEND); // A bit ironic, perhaps.
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+class Interpolate {
+public:
+ Interpolate(int width, int height, int flow_level);
+
+ // Returns a texture that must be released with release_texture()
+ // after use. tex0 and tex1 must be RGBA8 textures with mipmaps
+ // (unless flow_level == 0).
+ GLuint exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint width, GLuint height, float alpha);
+
+ void release_texture(GLuint tex) {
+ pool.release_texture(tex);
+ }
+
+private:
+ int width, height, flow_level;
+ TexturePool pool;
+ Splat splat;
+ Blend blend;
+};
+
+Interpolate::Interpolate(int width, int height, int flow_level)
+ : width(width), height(height), flow_level(flow_level) {}
+
+GLuint Interpolate::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint width, GLuint height, float alpha)
+{
+ GPUTimers timers;
+
+ ScopedTimer total_timer("Total", &timers);
+
+ // Pick out the right level to test splatting results on.
+ GLuint tex0_view, tex1_view;
+ glGenTextures(1, &tex0_view);
+ glTextureView(tex0_view, GL_TEXTURE_2D, tex0, GL_RGBA8, flow_level, 1, 0, 1);
+ glGenTextures(1, &tex1_view);
+ glTextureView(tex1_view, GL_TEXTURE_2D, tex1, GL_RGBA8, flow_level, 1, 0, 1);
+
+ int flow_width = width >> flow_level;
+ int flow_height = height >> flow_level;
+
+ GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ GLuint depth_tex = pool.get_texture(GL_DEPTH_COMPONENT32F, flow_width, flow_height); // Used for ranking flows.
+ {
+ ScopedTimer timer("Clear", &total_timer);
+ glClearTexImage(flow_tex, 0, GL_RG, GL_FLOAT, nullptr);
+ float infinity = 1.0f;
+ glClearTexImage(depth_tex, 0, GL_DEPTH_COMPONENT, GL_FLOAT, &infinity);
+ }
+
+ //SDL_GL_SwapWindow(window);
+ {
+ ScopedTimer timer("Splat", &total_timer);
+ splat.exec(tex0_view, tex1_view, forward_flow_tex, backward_flow_tex, flow_tex, depth_tex, flow_width, flow_height, alpha);
+ }
+ //SDL_GL_SwapWindow(window);
+ pool.release_texture(depth_tex);
+ glDeleteTextures(1, &tex0_view);
+ glDeleteTextures(1, &tex1_view);
+
+ GLuint output_tex = pool.get_texture(GL_RGB8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(tex0, tex1, flow_tex, output_tex, width, height, alpha);
+ }
+ total_timer.end();
+ timers.print();
+
+ return output_tex;
+}
+
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height)
+{
+ for (Texture &tex : textures) {
+ if (!tex.in_use && tex.format == format &&
+ tex.width == width && tex.height == height) {
+ tex.in_use = true;
+ return tex.tex_num;
+ }
+ }
+
+ Texture tex;
+ glCreateTextures(GL_TEXTURE_2D, 1, &tex.tex_num);
+ glTextureStorage2D(tex.tex_num, 1, format, width, height);
+ tex.format = format;
+ tex.width = width;
+ tex.height = height;
+ tex.in_use = true;
+ textures.push_back(tex);
+ return tex.tex_num;
+}
+
+void TexturePool::release_texture(GLuint tex_num)
+{
+ for (Texture &tex : textures) {
+ if (tex.tex_num == tex_num) {
+ assert(tex.in_use);
+ tex.in_use = false;
+ return;
+ }
+ }
+ assert(false);
+}