]> git.sesse.net Git - nageru/blobdiff - flow.cpp
Make PersistentFBOSet handle depth.
[nageru] / flow.cpp
index bda8eecfc162ff18bd4db86210a79729036204a2..cb747c91762b9692432966e27e2350a7c15e160d 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -47,6 +47,7 @@ constexpr unsigned patch_size_pixels = 12;
 float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
 
 bool enable_timing = true;
+bool detailed_timing = false;
 bool enable_variational_refinement = true;  // Just for debugging.
 bool enable_interpolation = false;
 
@@ -267,19 +268,19 @@ public:
        void render_to(const array<GLuint, num_elements> &textures);
 
        // Convenience wrappers.
-       void render_to(GLuint texture0, enable_if<num_elements == 1> * = nullptr) {
+       void render_to(GLuint texture0) {
                render_to({{texture0}});
        }
 
-       void render_to(GLuint texture0, GLuint texture1, enable_if<num_elements == 2> * = nullptr) {
+       void render_to(GLuint texture0, GLuint texture1) {
                render_to({{texture0, texture1}});
        }
 
-       void render_to(GLuint texture0, GLuint texture1, GLuint texture2, enable_if<num_elements == 3> * = nullptr) {
+       void render_to(GLuint texture0, GLuint texture1, GLuint texture2) {
                render_to({{texture0, texture1, texture2}});
        }
 
-       void render_to(GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3, enable_if<num_elements == 4> * = nullptr) {
+       void render_to(GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) {
                render_to({{texture0, texture1, texture2, texture3}});
        }
 
@@ -310,6 +311,59 @@ void PersistentFBOSet<num_elements>::render_to(const array<GLuint, num_elements>
        glBindFramebuffer(GL_FRAMEBUFFER, fbo);
 }
 
+// Same, but with a depth texture.
+template<size_t num_elements>
+class PersistentFBOSetWithDepth {
+public:
+       void render_to(GLuint depth_tex, const array<GLuint, num_elements> &textures);
+
+       // Convenience wrappers.
+       void render_to(GLuint depth_tex, GLuint texture0) {
+               render_to(depth_tex, {{texture0}});
+       }
+
+       void render_to(GLuint depth_tex, GLuint texture0, GLuint texture1) {
+               render_to(depth_tex, {{texture0, texture1}});
+       }
+
+       void render_to(GLuint depth_tex, GLuint texture0, GLuint texture1, GLuint texture2) {
+               render_to(depth_tex, {{texture0, texture1, texture2}});
+       }
+
+       void render_to(GLuint depth_tex, GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) {
+               render_to(depth_tex, {{texture0, texture1, texture2, texture3}});
+       }
+
+private:
+       // TODO: Delete these on destruction.
+       map<pair<GLuint, array<GLuint, num_elements>>, GLuint> fbos;
+};
+
+template<size_t num_elements>
+void PersistentFBOSetWithDepth<num_elements>::render_to(GLuint depth_tex, const array<GLuint, num_elements> &textures)
+{
+       auto key = make_pair(depth_tex, textures);
+
+       auto it = fbos.find(key);
+       if (it != fbos.end()) {
+               glBindFramebuffer(GL_FRAMEBUFFER, it->second);
+               return;
+       }
+
+       GLuint fbo;
+       glCreateFramebuffers(1, &fbo);
+       GLenum bufs[num_elements];
+       glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
+       for (size_t i = 0; i < num_elements; ++i) {
+               glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
+               bufs[i] = GL_COLOR_ATTACHMENT0 + i;
+       }
+       glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
+
+       fbos[key] = fbo;
+       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+}
+
 // Convert RGB to grayscale, using Rec. 709 coefficients.
 class GrayscaleConversion {
 public:
@@ -800,7 +854,7 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex
 class SOR {
 public:
        SOR();
-       void exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations);
+       void exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations, ScopedTimer *sor_timer);
 
 private:
        PersistentFBOSet<1> fbos;
@@ -838,7 +892,7 @@ SOR::SOR()
        uniform_phase = glGetUniformLocation(sor_program, "phase");
 }
 
-void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations)
+void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations, ScopedTimer *sor_timer)
 {
        glUseProgram(sor_program);
 
@@ -857,14 +911,20 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_te
        fbos.render_to(diff_flow_tex);
 
        for (int i = 0; i < num_iterations; ++i) {
-               glProgramUniform1i(sor_program, uniform_phase, 0);
-               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-               glTextureBarrier();
-               glProgramUniform1i(sor_program, uniform_phase, 1);
-               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-               if (i != num_iterations - 1) {
+               {
+                       ScopedTimer timer("Red pass", sor_timer);
+                       glProgramUniform1i(sor_program, uniform_phase, 0);
+                       glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
                        glTextureBarrier();
                }
+               {
+                       ScopedTimer timer("Black pass", sor_timer);
+                       glProgramUniform1i(sor_program, uniform_phase, 1);
+                       glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+                       if (i != num_iterations - 1) {
+                               glTextureBarrier();
+                       }
+               }
        }
 }
 
@@ -1066,7 +1126,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra
        ScopedTimer total_timer("Total", &timers);
        for (int level = coarsest_level; level >= int(finest_level); --level) {
                char timer_name[256];
-               snprintf(timer_name, sizeof(timer_name), "Level %d", level);
+               snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
                ScopedTimer level_timer(timer_name, &total_timer);
 
                int level_width = width >> level;
@@ -1190,7 +1250,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra
                        // Note that these are to/from the same texture.
                        {
                                ScopedTimer timer("SOR", &varref_timer);
-                               sor.exec(du_dv_tex, equation_tex, smoothness_x_tex, smoothness_y_tex, level_width, level_height, 5);
+                               sor.exec(du_dv_tex, equation_tex, smoothness_x_tex, smoothness_y_tex, level_width, level_height, 5, &timer);
                        }
                }
 
@@ -1245,7 +1305,7 @@ public:
        void exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint flow_tex, GLuint depth_tex, int width, int height, float alpha);
 
 private:
-       PersistentFBOSet<2> fbos;
+       PersistentFBOSetWithDepth<1> fbos;
 
        GLuint splat_vs_obj;
        GLuint splat_fs_obj;
@@ -1303,12 +1363,7 @@ void Splat::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backw
        glDepthFunc(GL_LESS);  // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.)
        glBindVertexArray(splat_vao);
 
-       // FIXME: Get this into FBOSet, so we can reuse FBOs across frames.
-       GLuint fbo;
-       glCreateFramebuffers(1, &fbo);
-       glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0, flow_tex, 0);
-       glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
-       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+       fbos.render_to(depth_tex, flow_tex);
 
        // Do forward splatting.
        bind_sampler(splat_program, uniform_flow_tex, 2, forward_flow_tex, nearest_sampler);
@@ -1321,8 +1376,6 @@ void Splat::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backw
        glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height);
 
        glDisable(GL_DEPTH_TEST);
-
-       glDeleteFramebuffers(1, &fbo);
 }
 
 // Doing good and fast hole-filling on a GPU is nontrivial. We choose an option
@@ -1348,7 +1401,7 @@ public:
        void exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height);
 
 private:
-       PersistentFBOSet<2> fbos;
+       PersistentFBOSetWithDepth<1> fbos;
 
        GLuint fill_vs_obj;
        GLuint fill_fs_obj;
@@ -1393,12 +1446,7 @@ void HoleFill::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int w
        glDepthFunc(GL_LESS);  // Only update the values > 0.999f (ie., only invalid pixels).
        glBindVertexArray(fill_vao);
 
-       // FIXME: Get this into FBOSet, so we can reuse FBOs across frames.
-       GLuint fbo;
-       glCreateFramebuffers(1, &fbo);
-       glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0, flow_tex, 0);  // NOTE: Reading and writing to the same texture.
-       glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
-       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+       fbos.render_to(depth_tex, flow_tex);  // NOTE: Reading and writing to the same texture.
 
        // Fill holes from the left, by shifting 1, 2, 4, 8, etc. pixels to the right.
        for (int offs = 1; offs < width; offs *= 2) {
@@ -1436,8 +1484,6 @@ void HoleFill::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int w
        }
 
        glDisable(GL_DEPTH_TEST);
-
-       glDeleteFramebuffers(1, &fbo);
 }
 
 // Blend the four directions from HoleFill into one pixel, so that single-pixel
@@ -1449,7 +1495,7 @@ public:
        void exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int width, int height);
 
 private:
-       PersistentFBOSet<2> fbos;
+       PersistentFBOSetWithDepth<1> fbos;
 
        GLuint blend_vs_obj;
        GLuint blend_fs_obj;
@@ -1501,18 +1547,11 @@ void HoleBlend::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int
        glDepthFunc(GL_LEQUAL);  // Skip over all of the pixels that were never holes to begin with.
        glBindVertexArray(blend_vao);
 
-       // FIXME: Get this into FBOSet, so we can reuse FBOs across frames.
-       GLuint fbo;
-       glCreateFramebuffers(1, &fbo);
-       glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0, flow_tex, 0);  // NOTE: Reading and writing to the same texture.
-       glNamedFramebufferTexture(fbo, GL_DEPTH_ATTACHMENT, depth_tex, 0);
-       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+       fbos.render_to(depth_tex, flow_tex);  // NOTE: Reading and writing to the same texture.
 
        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 
        glDisable(GL_DEPTH_TEST);
-
-       glDeleteFramebuffers(1, &fbo);
 }
 
 class Blend {
@@ -1974,6 +2013,7 @@ int main(int argc, char **argv)
                { "intensity-relative-weight", required_argument, 0, 'i' },  // delta.
                { "gradient-relative-weight", required_argument, 0, 'g' },  // gamma.
                { "disable-timing", no_argument, 0, 1000 },
+               { "detailed-timing", no_argument, 0, 1003 },
                { "ignore-variational-refinement", no_argument, 0, 1001 },  // Still calculates it, just doesn't apply it.
                { "interpolate", no_argument, 0, 1002 }
        };
@@ -2004,6 +2044,9 @@ int main(int argc, char **argv)
                case 1002:
                        enable_interpolation = true;
                        break;
+               case 1003:
+                       detailed_timing = true;
+                       break;
                default:
                        fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
                        exit(1);
@@ -2031,6 +2074,8 @@ int main(int argc, char **argv)
        SDL_GLContext context = SDL_GL_CreateContext(window);
        assert(context != nullptr);
 
+       glDisable(GL_DITHER);
+
        // FIXME: Should be part of DISComputeFlow (but needs to be initialized
        // before all the render passes).
        float vertices[] = {