Finally get SOR working.

[nageru] / flow.cpp
diff --git a/flow.cpp b/flow.cpp

index aedac9d5e63410f8c60a047d4903afb2722c787e..8b09c8840999148ee3c0d3a0375e0af6c833da08 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -37,7 +37,11 @@ constexpr unsigned patch_size_pixels = 12;
  // Weighting constants for the different parts of the variational refinement.
  // These don't correspond 1:1 to the values given in the DIS paper,
  // since we have different normalizations and ranges in some cases.
-float vr_gamma = 10.0f, vr_delta = 5.0f, vr_alpha = 10.0f;
+// These are found through a simple grid search on some MPI-Sintel data,
+// although the error (EPE) seems to be fairly insensitive to the precise values.
+// Only the relative values matter, so we fix alpha (the smoothness constant)
+// at unity and tweak the others.
+float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
  
  bool enable_timing = true;
  bool enable_variational_refinement = true;  // Just for debugging.
@@ -364,7 +368,7 @@ private:
         GLuint motion_search_program;
         GLuint motion_search_vao;
  
-       GLuint uniform_image_size, uniform_inv_image_size, uniform_inv_flow_size, uniform_inv_prev_level_size;
+       GLuint uniform_image_size, uniform_inv_image_size, uniform_flow_size, uniform_inv_prev_level_size;
         GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
  };
  
@@ -385,7 +389,7 @@ MotionSearch::MotionSearch()
  
         uniform_image_size = glGetUniformLocation(motion_search_program, "image_size");
         uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
-       uniform_inv_flow_size = glGetUniformLocation(motion_search_program, "inv_flow_size");
+       uniform_flow_size = glGetUniformLocation(motion_search_program, "flow_size");
         uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
         uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex");
         uniform_image1_tex = glGetUniformLocation(motion_search_program, "image1_tex");
@@ -404,7 +408,7 @@ void MotionSearch::exec(GLuint tex0_view, GLuint tex1_view, GLuint grad0_tex, GL
  
         glProgramUniform2f(motion_search_program, uniform_image_size, level_width, level_height);
         glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
-       glProgramUniform2f(motion_search_program, uniform_inv_flow_size, 1.0f / width_patches, 1.0f / height_patches);
+       glProgramUniform2f(motion_search_program, uniform_flow_size, width_patches, height_patches);
         glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
  
         glViewport(0, 0, width_patches, height_patches);
@@ -437,6 +441,7 @@ private:
  
         GLuint uniform_width_patches, uniform_patch_size, uniform_patch_spacing;
         GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
+       GLuint uniform_flow_size;
  };
  
  Densify::Densify()
@@ -460,6 +465,7 @@ Densify::Densify()
         uniform_image0_tex = glGetUniformLocation(densify_program, "image0_tex");
         uniform_image1_tex = glGetUniformLocation(densify_program, "image1_tex");
         uniform_flow_tex = glGetUniformLocation(densify_program, "flow_tex");
+       uniform_flow_size = glGetUniformLocation(densify_program, "flow_size");
  }
  
  void Densify::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches)
@@ -474,6 +480,9 @@ void Densify::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint d
         glProgramUniform2f(densify_program, uniform_patch_size,
                 float(patch_size_pixels) / level_width,
                 float(patch_size_pixels) / level_height);
+       glProgramUniform2f(densify_program, uniform_flow_size,
+               width_patches,
+               height_patches);
  
         float patch_spacing_x = float(level_width - patch_size_pixels) / (width_patches - 1);
         float patch_spacing_y = float(level_height - patch_size_pixels) / (height_patches - 1);
@@ -774,11 +783,12 @@ private:
         GLuint uniform_diff_flow_tex;
         GLuint uniform_equation_tex;
         GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex;
+       GLuint uniform_image_size, uniform_phase;
  };
  
  SOR::SOR()
  {
-       sor_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+       sor_vs_obj = compile_shader(read_file("sor.vert"), GL_VERTEX_SHADER);
         sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER);
         sor_program = link_program(sor_vs_obj, sor_fs_obj);
  
@@ -795,6 +805,8 @@ SOR::SOR()
         uniform_equation_tex = glGetUniformLocation(sor_program, "equation_tex");
         uniform_smoothness_x_tex = glGetUniformLocation(sor_program, "smoothness_x_tex");
         uniform_smoothness_y_tex = glGetUniformLocation(sor_program, "smoothness_y_tex");
+       uniform_image_size = glGetUniformLocation(sor_program, "image_size");
+       uniform_phase = glGetUniformLocation(sor_program, "phase");
  }
  
  void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations)
@@ -806,12 +818,22 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_te
         bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler);
         bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
  
+       glProgramUniform2f(sor_program, uniform_image_size, level_width, level_height);
+
+       // NOTE: We bind to the texture we are rendering from, but we never write any value
+       // that we read in the same shader pass (we call discard for red values when we compute
+       // black, and vice versa), and we have barriers between the passes, so we're fine
+       // as per the spec.
         glViewport(0, 0, level_width, level_height);
         glDisable(GL_BLEND);
         glBindVertexArray(sor_vao);
-       fbos.render_to(diff_flow_tex);  // NOTE: Bind to same as we render from!
+       fbos.render_to(diff_flow_tex);
  
         for (int i = 0; i < num_iterations; ++i) {
+               glProgramUniform1i(sor_program, uniform_phase, 0);
+               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+               glTextureBarrier();
+               glProgramUniform1i(sor_program, uniform_phase, 1);
                 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
                 if (i != num_iterations - 1) {
                         glTextureBarrier();
@@ -1097,8 +1119,13 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                 int level_width = width >> level;
                 int level_height = height >> level;
                 float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio);
-               int width_patches = 1 + lrintf((level_width - patch_size_pixels) / patch_spacing_pixels);
-               int height_patches = 1 + lrintf((level_height - patch_size_pixels) / patch_spacing_pixels);
+
+               // Make sure we have patches at least every Nth pixel, e.g. for width=9
+               // and patch_spacing=3 (the default), we put out patch centers in
+               // x=0, x=3, x=6, x=9, which is four patches. The fragment shader will
+               // lock all the centers to integer coordinates if needed.
+               int width_patches = 1 + ceil(level_width / patch_spacing_pixels);
+               int height_patches = 1 + ceil(level_height / patch_spacing_pixels);
  
                 // Make sure we always read from the correct level; the chosen
                 // mipmapping could otherwise be rather unpredictable, especially
@@ -1366,25 +1393,25 @@ void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename
  int main(int argc, char **argv)
  {
          static const option long_options[] = {
-                { "alpha", required_argument, 0, 'a' },
-                { "delta", required_argument, 0, 'd' },
-                { "gamma", required_argument, 0, 'g' },
+               { "smoothness-relative-weight", required_argument, 0, 's' },  // alpha.
+               { "intensity-relative-weight", required_argument, 0, 'i' },  // delta.
+               { "gradient-relative-weight", required_argument, 0, 'g' },  // gamma.
                 { "disable-timing", no_argument, 0, 1000 },
                 { "ignore-variational-refinement", no_argument, 0, 1001 }  // Still calculates it, just doesn't apply it.
         };
  
         for ( ;; ) {
                 int option_index = 0;
-               int c = getopt_long(argc, argv, "a:d:g:", long_options, &option_index);
+               int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
  
                 if (c == -1) {
                         break;
                 }
                 switch (c) {
-               case 'a':
+               case 's':
                         vr_alpha = atof(optarg);
                         break;
-               case 'd':
+               case 'i':
                         vr_delta = atof(optarg);
                         break;
                 case 'g':