]> git.sesse.net Git - nageru/commitdiff
Start parametrizing the operating points for DIS.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Thu, 23 Aug 2018 22:12:50 +0000 (00:12 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 15 Sep 2018 17:39:49 +0000 (19:39 +0200)
flow.cpp
flow.h

index 81130e27335f58252dde1eb2d7a7de2de33c162f..95bb1547b64b1095559632bd798a5f5fc9c8fe81 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -32,12 +32,6 @@ using namespace std;
 
 SDL_Window *window;
 
-// Operating point 3 (10 Hz on CPU, excluding preprocessing).
-constexpr float patch_overlap_ratio = 0.75f;
-constexpr unsigned coarsest_level = 5;
-constexpr unsigned finest_level = 1;
-constexpr unsigned patch_size_pixels = 12;
-
 // Weighting constants for the different parts of the variational refinement.
 // These don't correspond 1:1 to the values given in the DIS paper,
 // since we have different normalizations and ranges in some cases.
@@ -45,7 +39,7 @@ constexpr unsigned patch_size_pixels = 12;
 // although the error (EPE) seems to be fairly insensitive to the precise values.
 // Only the relative values matter, so we fix alpha (the smoothness constant)
 // at unity and tweak the others.
-float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
+static float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
 
 bool enable_timing = true;
 bool detailed_timing = false;
@@ -363,7 +357,8 @@ void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuin
        glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
 }
 
-Densify::Densify()
+Densify::Densify(const OperatingPoint &op)
+       : op(op)
 {
        densify_vs_obj = compile_shader(read_file("densify.vert"), GL_VERTEX_SHADER);
        densify_fs_obj = compile_shader(read_file("densify.frag"), GL_FRAGMENT_SHADER);
@@ -382,8 +377,8 @@ void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int
        bind_sampler(densify_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
 
        glProgramUniform2f(densify_program, uniform_patch_size,
-               float(patch_size_pixels) / level_width,
-               float(patch_size_pixels) / level_height);
+               float(op.patch_size_pixels) / level_width,
+               float(op.patch_size_pixels) / level_height);
 
        glViewport(0, 0, level_width, level_height);
        glEnable(GL_BLEND);
@@ -613,8 +608,8 @@ void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int inpu
        glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
 }
 
-DISComputeFlow::DISComputeFlow(int width, int height)
-       : width(width), height(height)
+DISComputeFlow::DISComputeFlow(int width, int height, const OperatingPoint &op)
+       : width(width), height(height), op(op), densify(op)
 {
        // Make some samplers.
        glCreateSamplers(1, &nearest_sampler);
@@ -676,14 +671,14 @@ GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStra
        glBindVertexArray(vao);
 
        ScopedTimer total_timer("Compute flow", &timers);
-       for (int level = coarsest_level; level >= int(finest_level); --level) {
+       for (int level = op.coarsest_level; level >= int(op.finest_level); --level) {
                char timer_name[256];
                snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
                ScopedTimer level_timer(timer_name, &total_timer);
 
                int level_width = width >> level;
                int level_height = height >> level;
-               float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio);
+               float patch_spacing_pixels = op.patch_size_pixels * (1.0f - op.patch_overlap_ratio);
 
                // Make sure we have patches at least every Nth pixel, e.g. for width=9
                // and patch_spacing=3 (the default), we put out patch centers in
@@ -812,7 +807,7 @@ GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStra
                //
                // Disabling this doesn't save any time (although we could easily make it so that
                // it is more efficient), but it helps debug the motion search.
-               if (enable_variational_refinement) {
+               if (op.variational_refinement) {
                        ScopedTimer timer("Add differential flow", &varref_timer);
                        add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height, num_layers);
                }
@@ -832,7 +827,7 @@ GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStra
        }
 
        // Scale up the flow to the final size (if needed).
-       if (finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
+       if (op.finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
                return prev_level_flow_tex;
        } else {
                GLuint final_tex = pool.get_texture(GL_RG16F, width, height, num_layers);
@@ -842,7 +837,8 @@ GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStra
        }
 }
 
-Splat::Splat()
+Splat::Splat(const OperatingPoint &op)
+       : op(op)
 {
        splat_vs_obj = compile_shader(read_file("splat.vert"), GL_VERTEX_SHADER);
        splat_fs_obj = compile_shader(read_file("splat.frag"), GL_FRAGMENT_SHADER);
@@ -862,12 +858,7 @@ void Splat::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_te
        bind_sampler(splat_program, uniform_image_tex, 0, image_tex, linear_sampler);
        bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
 
-       // FIXME: This is set to 1.0 right now so not to trigger Haswell's “PMA stall”.
-       // Move to 2.0 later, or even 4.0.
-       // (Since we have hole filling, it's not critical, but larger values seem to do
-       // better than hole filling for large motion, blurs etc.)
-       float splat_size = 1.0f;  // 4x4 splat means 16x overdraw, 2x2 splat means 4x overdraw.
-       glProgramUniform2f(splat_program, uniform_splat_size, splat_size / width, splat_size / height);
+       glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
        glProgramUniform1f(splat_program, uniform_alpha, alpha);
        glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
 
@@ -1016,8 +1007,8 @@ void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int level
        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 }
 
-Interpolate::Interpolate(int width, int height, int flow_level)
-       : width(width), height(height), flow_level(flow_level) {
+Interpolate::Interpolate(int width, int height, const OperatingPoint &op)
+       : width(width), height(height), flow_level(op.finest_level), op(op), splat(op) {
        // Set up the vertex data that will be shared between all passes.
        float vertices[] = {
                0.0f, 1.0f,
@@ -1337,7 +1328,11 @@ void compute_flow_only(int argc, char **argv, int optind)
        gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
        glGenerateTextureMipmap(tex_gray);
 
-       DISComputeFlow compute_flow(width1, height1);
+       OperatingPoint op = operating_point3;
+       if (!enable_variational_refinement) {
+               op.variational_refinement = false;
+       }
+       DISComputeFlow compute_flow(width1, height1, op);
 
        if (enable_warmup) {
                in_warmup = true;
@@ -1438,9 +1433,13 @@ void interpolate_image(int argc, char **argv, int optind)
                spare_pbos.push(pbos[i]);
        }
 
-       DISComputeFlow compute_flow(width1, height1);
+       OperatingPoint op = operating_point3;
+       if (!enable_variational_refinement) {
+               op.variational_refinement = false;
+       }
+       DISComputeFlow compute_flow(width1, height1, op);
        GrayscaleConversion gray;
-       Interpolate interpolate(width1, height1, finest_level);
+       Interpolate interpolate(width1, height1, op);
 
        GLuint tex_gray;
        glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
diff --git a/flow.h b/flow.h
index d8a80a4f9c324467523e5842c9f5a0a7293d24ef..0a6ea203c31517b9624cb8bb732e7a81136ac93f 100644 (file)
--- a/flow.h
+++ b/flow.h
@@ -2,7 +2,8 @@
 #define _FLOW_H 1
 
 // Code for computing optical flow between two images, and using it to interpolate
-// in-between frames. The main user interface is the Interpolate class.
+// in-between frames. The main user interface is the DISComputeFlow and Interpolate
+// classes (also GrayscaleConversion can be useful).
 
 #include <stdint.h>
 #include <epoxy/gl.h>
 
 class ScopedTimer;
 
+// Predefined operating points from the paper.
+struct OperatingPoint {
+       unsigned coarsest_level;  // TODO: Adjust dynamically based on the resolution?
+       unsigned finest_level;
+       unsigned search_iterations;  // TODO: Not implemented yet! Halved from the paper.
+       unsigned patch_size_pixels;  // TODO: Not implemented in the shader yet!
+       float patch_overlap_ratio;
+       bool variational_refinement;  // TODO: Actually disabling this is not implemented yet!
+
+       // Not part of the original paper; used for interpolation.
+       // NOTE: Values much larger than 1.0 seems to trigger Haswell's “PMA stall”;
+       // the problem is not present on Broadwell and higher (there's a mitigation
+       // in the hardware, but Mesa doesn't enable it at the time of writing).
+       // Since we have hole filling, the holes from 1.0 are not critical,
+       // but larger values seem to do better than hole filling for large
+       // motion, blurs etc. since we have more candidates.
+       float splat_size;
+};
+
+// Operating point 1 (600 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point1 = {
+       5,      // Coarsest level.
+       3,      // Finest level.
+       8,      // Search iterations.
+       8,      // Patch size (pixels).
+       0.30f,  // Overlap ratio.
+       false,  // Variational refinement.
+       1.0f    // Splat size (pixels).
+};
+
+// Operating point 2 (300 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point2 = {
+       5,      // Coarsest level.
+       3,      // Finest level.
+       6,      // Search iterations.
+       8,      // Patch size (pixels).
+       0.40f,  // Overlap ratio.
+       true,   // Variational refinement.
+       1.0f    // Splat size (pixels).
+};
+
+// Operating point 3 (10 Hz on CPU, excluding preprocessing).
+// This is the only one that has been thorougly tested.
+static constexpr OperatingPoint operating_point3 = {
+       5,      // Coarsest level.
+       1,      // Finest level.
+       8,      // Search iterations.
+       12,     // Patch size (pixels).
+       0.75f,  // Overlap ratio.
+       true,   // Variational refinement.
+       4.0f    // Splat size (pixels).
+};
+
+// Operating point 4 (0.5 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point4 = {
+       5,      // Coarsest level.
+       0,      // Finest level.
+       128,    // Search iterations.
+       12,     // Patch size (pixels).
+       0.75f,  // Overlap ratio.
+       true,   // Variational refinement.
+       8.0f    // Splat size (pixels).
+};
+
 // A class that caches FBOs that render to a given set of textures.
 // It never frees anything, so it is only suitable for rendering to
 // the same (small) set of textures over and over again.
@@ -137,10 +202,11 @@ private:
 // weight in the B channel. Dividing R and G by B gives the normalized values.
 class Densify {
 public:
-       Densify();
+       Densify(const OperatingPoint &op);
        void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers);
 
 private:
+       OperatingPoint op;
        PersistentFBOSet<1> fbos;
 
        GLuint densify_vs_obj;
@@ -333,7 +399,7 @@ private:
 
 class DISComputeFlow {
 public:
-       DISComputeFlow(int width, int height);
+       DISComputeFlow(int width, int height, const OperatingPoint &op);
 
        enum FlowDirection {
                FORWARD,
@@ -358,6 +424,7 @@ private:
        GLuint initial_flow_tex;
        GLuint vertex_vbo, vao;
        TexturePool pool;
+       const OperatingPoint op;
 
        // The various passes.
        Sobel sobel;
@@ -376,12 +443,13 @@ private:
 // radius fills most of the holes.
 class Splat {
 public:
-       Splat();
+       Splat(const OperatingPoint &op);
 
        // alpha is the time of the interpolated frame (0..1).
        void exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
 
 private:
+       const OperatingPoint op;
        PersistentFBOSetWithDepth<1> fbos;
 
        GLuint splat_vs_obj;
@@ -462,7 +530,7 @@ private:
 
 class Interpolate {
 public:
-       Interpolate(int width, int height, int flow_level);
+       Interpolate(int width, int height, const OperatingPoint &op);
 
        // Returns a texture that must be released with release_texture()
        // after use. image_tex must be a two-layer RGBA8 texture with mipmaps
@@ -477,6 +545,7 @@ private:
        int width, height, flow_level;
        GLuint vertex_vbo, vao;
        TexturePool pool;
+       const OperatingPoint op;
 
        Splat splat;
        HoleFill hole_fill;