+ glDisable(GL_DEPTH_TEST);
+}
+
+// Blend the four directions from HoleFill into one pixel, so that single-pixel
+// holes become the average of their four neighbors.
+class HoleBlend {
+public:
+ HoleBlend();
+
+ void exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height);
+
+private:
+ PersistentFBOSetWithDepth<1> fbos;
+
+ GLuint blend_vs_obj;
+ GLuint blend_fs_obj;
+ GLuint blend_program;
+
+ GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex;
+ GLuint uniform_z, uniform_sample_offset;
+};
+
+HoleBlend::HoleBlend()
+{
+ blend_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill.
+ blend_fs_obj = compile_shader(read_file("hole_blend.frag"), GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
+ uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
+ uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
+ uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
+ uniform_z = glGetUniformLocation(blend_program, "z");
+ uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
+}
+
+void HoleBlend::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height)
+{
+ glUseProgram(blend_program);
+
+ bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
+ bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
+ bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
+ bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+ glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LEQUAL); // Skip over all of the pixels that were never holes to begin with.
+
+ fbos.render_to(depth_tex, flow_tex); // NOTE: Reading and writing to the same texture.
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ glDisable(GL_DEPTH_TEST);
+}
+
+class Blend {
+public:
+ Blend();
+ void exec(GLuint tex0, GLuint tex1, GLuint flow_tex, GLuint output_tex, int width, int height, float alpha);
+
+private:
+ PersistentFBOSet<1> fbos;
+ GLuint blend_vs_obj;
+ GLuint blend_fs_obj;
+ GLuint blend_program;
+
+ GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
+ GLuint uniform_alpha, uniform_flow_consistency_tolerance;
+};
+
+Blend::Blend()
+{
+ blend_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ blend_fs_obj = compile_shader(read_file("blend.frag"), GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ uniform_image0_tex = glGetUniformLocation(blend_program, "image0_tex");
+ uniform_image1_tex = glGetUniformLocation(blend_program, "image1_tex");
+ uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
+ uniform_alpha = glGetUniformLocation(blend_program, "alpha");
+ uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
+}
+
+void Blend::exec(GLuint tex0, GLuint tex1, GLuint flow_tex, GLuint output_tex, int level_width, int level_height, float alpha)
+{
+ glUseProgram(blend_program);
+ bind_sampler(blend_program, uniform_image0_tex, 0, tex0, linear_sampler);
+ bind_sampler(blend_program, uniform_image1_tex, 1, tex1, linear_sampler);
+ bind_sampler(blend_program, uniform_flow_tex, 2, flow_tex, linear_sampler); // May be upsampled.
+ glProgramUniform1f(blend_program, uniform_alpha, alpha);
+
+ glViewport(0, 0, level_width, level_height);
+ fbos.render_to(output_tex);
+ glUseProgram(blend_program);
+ glDisable(GL_BLEND); // A bit ironic, perhaps.
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+class Interpolate {
+public:
+ Interpolate(int width, int height, int flow_level);
+
+ // Returns a texture that must be released with release_texture()
+ // after use. tex0 and tex1 must be RGBA8 textures with mipmaps
+ // (unless flow_level == 0).
+ GLuint exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint width, GLuint height, float alpha);
+
+ void release_texture(GLuint tex) {
+ pool.release_texture(tex);
+ }
+
+private:
+ int width, height, flow_level;
+ GLuint vertex_vbo, vao;
+ TexturePool pool;
+
+ Splat splat;
+ HoleFill hole_fill;
+ HoleBlend hole_blend;
+ Blend blend;
+};
+
+Interpolate::Interpolate(int width, int height, int flow_level)
+ : width(width), height(height), flow_level(flow_level) {
+ // Set up the vertex data that will be shared between all passes.
+ float vertices[] = {
+ 0.0f, 1.0f,
+ 0.0f, 0.0f,
+ 1.0f, 1.0f,
+ 1.0f, 0.0f,
+ };
+ glCreateBuffers(1, &vertex_vbo);
+ glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+ glCreateVertexArrays(1, &vao);
+ glBindVertexArray(vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = 0; // Hard-coded in every vertex shader.
+ glEnableVertexArrayAttrib(vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+}
+
+GLuint Interpolate::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint width, GLuint height, float alpha)
+{
+ GPUTimers timers;
+
+ ScopedTimer total_timer("Total", &timers);
+
+ glBindVertexArray(vao);
+
+ // Pick out the right level to test splatting results on.
+ GLuint tex0_view, tex1_view;
+ glGenTextures(1, &tex0_view);
+ glTextureView(tex0_view, GL_TEXTURE_2D, tex0, GL_RGBA8, flow_level, 1, 0, 1);
+ glGenTextures(1, &tex1_view);
+ glTextureView(tex1_view, GL_TEXTURE_2D, tex1, GL_RGBA8, flow_level, 1, 0, 1);
+
+ int flow_width = width >> flow_level;
+ int flow_height = height >> flow_level;
+
+ GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ GLuint depth_tex = pool.get_texture(GL_DEPTH_COMPONENT32F, flow_width, flow_height); // Used for ranking flows.
+
+ {
+ ScopedTimer timer("Splat", &total_timer);
+ splat.exec(tex0_view, tex1_view, forward_flow_tex, backward_flow_tex, flow_tex, depth_tex, flow_width, flow_height, alpha);
+ }
+ glDeleteTextures(1, &tex0_view);
+ glDeleteTextures(1, &tex1_view);
+
+ GLuint temp_tex[3];
+ temp_tex[0] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ temp_tex[1] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ temp_tex[2] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+
+ {
+ ScopedTimer timer("Fill holes", &total_timer);
+ hole_fill.exec(flow_tex, depth_tex, temp_tex, flow_width, flow_height);
+ hole_blend.exec(flow_tex, depth_tex, temp_tex, flow_width, flow_height);
+ }
+
+ pool.release_texture(temp_tex[0]);
+ pool.release_texture(temp_tex[1]);
+ pool.release_texture(temp_tex[2]);
+ pool.release_texture(depth_tex);
+
+ GLuint output_tex = pool.get_texture(GL_RGBA8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(tex0, tex1, flow_tex, output_tex, width, height, alpha);
+ }
+ pool.release_texture(flow_tex);
+ total_timer.end();
+ timers.print();
+
+ return output_tex;
+}
+
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height)
+{
+ for (Texture &tex : textures) {
+ if (!tex.in_use && tex.format == format &&
+ tex.width == width && tex.height == height) {
+ tex.in_use = true;
+ return tex.tex_num;
+ }
+ }
+
+ Texture tex;
+ glCreateTextures(GL_TEXTURE_2D, 1, &tex.tex_num);
+ glTextureStorage2D(tex.tex_num, 1, format, width, height);
+ tex.format = format;
+ tex.width = width;
+ tex.height = height;
+ tex.in_use = true;
+ textures.push_back(tex);
+ return tex.tex_num;
+}
+
+void TexturePool::release_texture(GLuint tex_num)
+{
+ for (Texture &tex : textures) {
+ if (tex.tex_num == tex_num) {
+ assert(tex.in_use);
+ tex.in_use = false;
+ return;
+ }
+ }
+ assert(false);
+}
+
+// OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
+void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
+{
+ for (unsigned i = 0; i < width * height; ++i) {
+ dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
+ }
+}
+
+// Not relevant for RGB.
+void flip_coordinate_system(uint8_t *dense_flow, unsigned width, unsigned height)
+{
+}
+
+void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
+{
+ FILE *flowfp = fopen(filename, "wb");
+ fprintf(flowfp, "FEIH");
+ fwrite(&width, 4, 1, flowfp);
+ fwrite(&height, 4, 1, flowfp);
+ for (unsigned y = 0; y < height; ++y) {
+ int yy = height - y - 1;
+ fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
+ }
+ fclose(flowfp);
+}
+
+// Not relevant for RGB.
+void write_flow(const char *filename, const uint8_t *dense_flow, unsigned width, unsigned height)
+{
+ assert(false);
+}
+
+void write_ppm(const char *filename, const float *dense_flow, unsigned width, unsigned height)
+{
+ FILE *fp = fopen(filename, "wb");
+ fprintf(fp, "P6\n%d %d\n255\n", width, height);
+ for (unsigned y = 0; y < unsigned(height); ++y) {
+ int yy = height - y - 1;
+ for (unsigned x = 0; x < unsigned(width); ++x) {
+ float du = dense_flow[(yy * width + x) * 2 + 0];
+ float dv = dense_flow[(yy * width + x) * 2 + 1];