+// Doing good and fast hole-filling on a GPU is nontrivial. We choose an option
+// that's fairly simple (given that most holes are really small) and also hopefully
+// cheap should the holes not be so small. Conceptually, we look for the first
+// non-hole to the left of us (ie., shoot a ray until we hit something), then
+// the first non-hole to the right of us, then up and down, and then average them
+// all together. It's going to create “stars” if the holes are big, but OK, that's
+// a tradeoff.
+//
+// Our implementation here is efficient assuming that the hierarchical Z-buffer is
+// on even for shaders that do discard (this typically kills early Z, but hopefully
+// not hierarchical Z); we set up Z so that only holes are written to, which means
+// that as soon as a hole is filled, the rasterizer should just skip it. Most of the
+// fullscreen quads should just be discarded outright, really.
+class HoleFill {
+public:
+ HoleFill();
+
+ // Output will be in flow_tex, temp_tex[0, 1, 2], representing the filling
+ // from the down, left, right and up, respectively. Use HoleBlend to merge
+ // them into one.
+ void exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height);
+
+private:
+ PersistentFBOSet<2> fbos;
+
+ GLuint fill_vs_obj;
+ GLuint fill_fs_obj;
+ GLuint fill_program;
+ GLuint fill_vao;
+
+ GLuint uniform_tex;
+ GLuint uniform_z, uniform_sample_offset;
+};
+
+HoleFill::HoleFill()
+{
+ fill_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER);
+ fill_fs_obj = compile_shader(read_file("hole_fill.frag"), GL_FRAGMENT_SHADER);
+ fill_program = link_program(fill_vs_obj, fill_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &fill_vao);
+ glBindVertexArray(fill_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(fill_program, "position");
+ glEnableVertexArrayAttrib(fill_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_tex = glGetUniformLocation(fill_program, "tex");
+ uniform_z = glGetUniformLocation(fill_program, "z");
+ uniform_sample_offset = glGetUniformLocation(fill_program, "sample_offset");
+}
+
+void HoleFill::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height)
+{
+ glUseProgram(fill_program);
+
+ bind_sampler(fill_program, uniform_tex, 0, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 1.0f / 1024.0f);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LESS); // Only update the values > 0.999f (ie., only invalid pixels).
+ glBindVertexArray(fill_vao);
+
+ // FIXME: Get this into FBOSet, so we can reuse FBOs across frames.
+ GLuint fbo;
+ glCreateFramebuffers(1, &fbo);
+ glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0, flow_tex, 0); // NOTE: Reading and writing to the same texture.
+ glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+ // Fill holes from the left, by shifting 1, 2, 4, 8, etc. pixels to the right.
+ for (int offs = 1; offs < width; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, -offs / float(width), 0.0f);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+ glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[0], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+ // Similar to the right; adjust Z a bit down, so that we re-fill the pixels that
+ // were overwritten in the last algorithm.
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 2.0f / 1024.0f);
+ for (int offs = 1; offs < width; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, offs / float(width), 0.0f);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+ glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[1], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+ // Up.
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 3.0f / 1024.0f);
+ for (int offs = 1; offs < height; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, -offs / float(height));
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+ glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[2], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+ // Down.
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+ for (int offs = 1; offs < height; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, offs / float(height));
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+
+ glDisable(GL_DEPTH_TEST);
+
+ glDeleteFramebuffers(1, &fbo);
+}
+
+// Blend the four directions from HoleFill into one pixel, so that single-pixel
+// holes become the average of their four neighbors.
+class HoleBlend {
+public:
+ HoleBlend();
+
+ void exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height);
+
+private:
+ PersistentFBOSet<2> fbos;
+
+ GLuint blend_vs_obj;
+ GLuint blend_fs_obj;
+ GLuint blend_program;
+ GLuint blend_vao;
+
+ GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex;
+ GLuint uniform_z, uniform_sample_offset;
+};
+
+HoleBlend::HoleBlend()
+{
+ blend_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill.
+ blend_fs_obj = compile_shader(read_file("hole_blend.frag"), GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &blend_vao);
+ glBindVertexArray(blend_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(blend_program, "position");
+ glEnableVertexArrayAttrib(blend_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
+ uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
+ uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
+ uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
+ uniform_z = glGetUniformLocation(blend_program, "z");
+ uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
+}
+
+void HoleBlend::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height)
+{
+ glUseProgram(blend_program);
+
+ bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
+ bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
+ bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
+ bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+ glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LEQUAL); // Skip over all of the pixels that were never holes to begin with.
+ glBindVertexArray(blend_vao);
+
+ // FIXME: Get this into FBOSet, so we can reuse FBOs across frames.
+ GLuint fbo;
+ glCreateFramebuffers(1, &fbo);
+ glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0, flow_tex, 0); // NOTE: Reading and writing to the same texture.
+ glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ glDisable(GL_DEPTH_TEST);
+
+ glDeleteFramebuffers(1, &fbo);
+}
+