+ glDisable(GL_DEPTH_TEST);
+}
+
+HoleBlend::HoleBlend()
+{
+ blend_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill.
+ blend_fs_obj = compile_shader(read_file("hole_blend.frag", _binary_hole_blend_frag_data, _binary_hole_blend_frag_size), GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
+ uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
+ uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
+ uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
+ uniform_z = glGetUniformLocation(blend_program, "z");
+ uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
+}
+
+void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
+{
+ glUseProgram(blend_program);
+
+ bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
+ bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
+ bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
+ bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+ glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LEQUAL); // Skip over all of the pixels that were never holes to begin with.
+
+ fbos.render_to(depth_rb, flow_tex); // NOTE: Reading and writing to the same texture.
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ glDisable(GL_DEPTH_TEST);
+}
+
+Blend::Blend(bool split_ycbcr_output)
+ : split_ycbcr_output(split_ycbcr_output)
+{
+ string frag_shader = read_file("blend.frag", _binary_blend_frag_data, _binary_blend_frag_size);
+ if (split_ycbcr_output) {
+ // Insert after the first #version line.
+ size_t offset = frag_shader.find('\n');
+ assert(offset != string::npos);
+ frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1);
+ }
+
+ blend_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ uniform_image_tex = glGetUniformLocation(blend_program, "image_tex");
+ uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
+ uniform_alpha = glGetUniformLocation(blend_program, "alpha");
+ uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
+}
+
+void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha)
+{
+ glUseProgram(blend_program);
+ bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler);
+ bind_sampler(blend_program, uniform_flow_tex, 1, flow_tex, linear_sampler); // May be upsampled.
+ glProgramUniform1f(blend_program, uniform_alpha, alpha);
+
+ glViewport(0, 0, level_width, level_height);
+ if (split_ycbcr_output) {
+ fbos_split.render_to(output_tex, output2_tex);
+ } else {
+ fbos.render_to(output_tex);
+ }
+ glDisable(GL_BLEND); // A bit ironic, perhaps.
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+Interpolate::Interpolate(const OperatingPoint &op, bool split_ycbcr_output)
+ : flow_level(op.finest_level),
+ split_ycbcr_output(split_ycbcr_output),
+ splat(op),
+ blend(split_ycbcr_output) {
+ // Set up the vertex data that will be shared between all passes.
+ float vertices[] = {
+ 0.0f, 1.0f,
+ 0.0f, 0.0f,
+ 1.0f, 1.0f,
+ 1.0f, 0.0f,
+ };
+ glCreateBuffers(1, &vertex_vbo);
+ glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+ glCreateVertexArrays(1, &vao);
+ glBindVertexArray(vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = 0; // Hard-coded in every vertex shader.
+ glEnableVertexArrayAttrib(vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+}
+
+pair<GLuint, GLuint> Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
+{
+ GPUTimers timers;
+
+ ScopedTimer total_timer("Interpolate", &timers);
+
+ glBindVertexArray(vao);
+ glDisable(GL_DITHER);
+
+ // Pick out the right level to test splatting results on.
+ GLuint tex_view;
+ glGenTextures(1, &tex_view);
+ glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2);
+
+ int flow_width = width >> flow_level;
+ int flow_height = height >> flow_level;
+
+ GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ GLuint depth_rb = pool.get_renderbuffer(GL_DEPTH_COMPONENT16, flow_width, flow_height); // Used for ranking flows.
+
+ {
+ ScopedTimer timer("Splat", &total_timer);
+ splat.exec(tex_view, bidirectional_flow_tex, flow_tex, depth_rb, flow_width, flow_height, alpha);
+ }
+ glDeleteTextures(1, &tex_view);
+
+ GLuint temp_tex[3];
+ temp_tex[0] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ temp_tex[1] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ temp_tex[2] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+
+ {
+ ScopedTimer timer("Fill holes", &total_timer);
+ hole_fill.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
+ hole_blend.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
+ }
+
+ pool.release_texture(temp_tex[0]);
+ pool.release_texture(temp_tex[1]);
+ pool.release_texture(temp_tex[2]);
+ pool.release_renderbuffer(depth_rb);
+
+ GLuint output_tex, output2_tex = 0;
+ if (split_ycbcr_output) {
+ output_tex = pool.get_texture(GL_R8, width, height);
+ output2_tex = pool.get_texture(GL_RG8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha);
+ }
+ } else {
+ output_tex = pool.get_texture(GL_RGBA8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha);
+ }
+ }
+ pool.release_texture(flow_tex);
+ total_timer.end();
+ if (!in_warmup) {
+ timers.print();
+ }
+
+ return make_pair(output_tex, output2_tex);
+}
+
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers)
+{
+ {
+ lock_guard<mutex> lock(mu);
+ for (Texture &tex : textures) {
+ if (!tex.in_use && !tex.is_renderbuffer && tex.format == format &&
+ tex.width == width && tex.height == height && tex.num_layers == num_layers) {
+ tex.in_use = true;
+ return tex.tex_num;
+ }