Start implementing motion search.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Mon, 2 Jul 2018 22:03:45 +0000 (00:03 +0200)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Mon, 2 Jul 2018 22:03:45 +0000 (00:03 +0200)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 2 Jul 2018 22:03:45 +0000 (00:03 +0200)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 2 Jul 2018 22:03:45 +0000 (00:03 +0200)
diff --git a/flow.cpp b/flow.cpp

index 3100c50a601c6c5fa6259785c6ee68d4ff667721..f4fbb74d67903c5ee1d422977b93636ae3026a79 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -1,5 +1,5 @@
  #define NO_SDL_GLEXT 1
-  
+
  #define WIDTH 1280
  #define HEIGHT 720
  
@@ -185,7 +185,19 @@ GLuint fill_vertex_attribute(GLuint vao, GLuint glsl_program_num, const string &
         glBindBuffer(GL_ARRAY_BUFFER, 0);
  
         return vbo;
-}      
+}
+
+void bind_sampler(GLuint program, const char *uniform_name, GLuint texture_unit, GLuint tex, GLuint sampler)
+{
+       GLint location = glGetUniformLocation(program, uniform_name);
+       if (location == -1) {
+               return;
+       }
+
+       glBindTextureUnit(texture_unit, tex);
+       glBindSampler(texture_unit, sampler);
+       glProgramUniform1i(program, location, texture_unit);
+}
  
  int main(void)
  {
@@ -215,7 +227,7 @@ int main(void)
         GLuint tex1 = load_texture("test1500.pgm", WIDTH, HEIGHT);
  
         // Load shaders.
-       GLuint motion_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+       GLuint motion_vs_obj = compile_shader(read_file("motion_search.vert"), GL_VERTEX_SHADER);
         GLuint motion_fs_obj = compile_shader(read_file("motion_search.frag"), GL_FRAGMENT_SHADER);
         GLuint motion_search_program = link_program(motion_vs_obj, motion_fs_obj);
  
@@ -223,13 +235,45 @@ int main(void)
         GLuint sobel_fs_obj = compile_shader(read_file("sobel.frag"), GL_FRAGMENT_SHADER);
         GLuint sobel_program = link_program(sobel_vs_obj, sobel_fs_obj);
  
+       // Make some samplers.
+       GLuint nearest_sampler;
+       glCreateSamplers(1, &nearest_sampler);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+       GLuint linear_sampler;
+       glCreateSamplers(1, &linear_sampler);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+       GLuint mipmap_sampler;
+       glCreateSamplers(1, &mipmap_sampler);
+       glSamplerParameteri(mipmap_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
+       glSamplerParameteri(mipmap_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+       glSamplerParameteri(mipmap_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       glSamplerParameteri(mipmap_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
         // Coarsest level.
-       int level_width = WIDTH >> coarsest_level;
-       int level_height = HEIGHT >> coarsest_level;
+       int level = coarsest_level;
+       int level_width = WIDTH >> level;
+       int level_height = HEIGHT >> level;
         float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio);
         int width_patches = 1 + lrintf((level_width - patch_size_pixels) / patch_spacing_pixels);
         int height_patches = 1 + lrintf((level_height - patch_size_pixels) / patch_spacing_pixels);
  
+       // Make sure we always read from the correct level; the chosen
+       // mipmapping could otherwise be rather unpredictable, especially
+       // during motion search.
+       GLuint tex0_view, tex1_view;
+       glGenTextures(1, &tex0_view);
+       glTextureView(tex0_view, GL_TEXTURE_2D, tex0, GL_R8, level, 1, 0, 1);
+       glGenTextures(1, &tex1_view);
+       glTextureView(tex1_view, GL_TEXTURE_2D, tex1, GL_R8, level, 1, 0, 1);
+
         // Compute gradients in every point, used for the motion search.
         // The DIS paper doesn't actually mention how these are computed,
         // but seemingly, a 3x3 Sobel operator is used here (at least in
@@ -241,16 +285,17 @@ int main(void)
  
         // Create a new texture; we could be fancy and render use a multi-level
         // texture, but meh.
-       GLuint grad_tex;
-       glCreateTextures(GL_TEXTURE_2D, 1, &grad_tex);
-       glTextureStorage2D(grad_tex, 1, GL_RG16F, level_width, level_height);
+       GLuint grad0_tex;
+       glCreateTextures(GL_TEXTURE_2D, 1, &grad0_tex);
+       glTextureStorage2D(grad0_tex, 1, GL_RG16F, level_width, level_height);
  
-       GLuint grad_fbo;
-       glCreateFramebuffers(1, &grad_fbo);
-       glNamedFramebufferTexture(grad_fbo, GL_COLOR_ATTACHMENT0, grad_tex, 0);
+       GLuint grad0_fbo;
+       glCreateFramebuffers(1, &grad0_fbo);
+       glNamedFramebufferTexture(grad0_fbo, GL_COLOR_ATTACHMENT0, grad0_tex, 0);
  
         glUseProgram(sobel_program);
-       glBindTextureUnit(0, tex0);
+       glBindTextureUnit(0, tex0_view);
+       glBindSampler(0, nearest_sampler);
         glProgramUniform1i(sobel_program, glGetUniformLocation(sobel_program, "tex"), 0);
         glProgramUniform1f(sobel_program, glGetUniformLocation(sobel_program, "inv_width"), 1.0f / level_width);
         glProgramUniform1f(sobel_program, glGetUniformLocation(sobel_program, "inv_height"), 1.0f / level_height);
@@ -282,16 +327,62 @@ int main(void)
  
         // Now finally draw.
         glViewport(0, 0, level_width, level_height);
-       glBindFramebuffer(GL_FRAMEBUFFER, grad_fbo);
+       glBindFramebuffer(GL_FRAMEBUFFER, grad0_fbo);
         glUseProgram(sobel_program);
         glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-       glUseProgram(0);
-       glBindFramebuffer(GL_FRAMEBUFFER, 0);
-        glBindVertexArray(0);
  
+       // Motion search to find the initial flow.
+
+       // Create a flow texture, initialized to zero.
         GLuint flow_tex;
         glCreateTextures(GL_TEXTURE_2D, 1, &flow_tex);
         glTextureStorage2D(flow_tex, 1, GL_RG16F, width_patches, height_patches);
  
+       // And an output flow texture. (Well, we could have used texture barriers,
+       // but I don't feel lucky today.)
+       GLuint flow_out_tex;
+       glCreateTextures(GL_TEXTURE_2D, 1, &flow_out_tex);
+       glTextureStorage2D(flow_out_tex, 1, GL_RG16F, width_patches, height_patches);
+
+       GLuint flow_fbo;
+       glCreateFramebuffers(1, &flow_fbo);
+       glNamedFramebufferTexture(flow_fbo, GL_COLOR_ATTACHMENT0, flow_out_tex, 0);
+
+       glUseProgram(motion_search_program);
+
+       bind_sampler(motion_search_program, "image0_tex", 0, tex0_view, nearest_sampler);
+       bind_sampler(motion_search_program, "image1_tex", 1, tex1_view, linear_sampler);
+       bind_sampler(motion_search_program, "grad0_tex", 2, grad0_tex, nearest_sampler);
+       bind_sampler(motion_search_program, "flow_tex", 3, flow_tex, nearest_sampler);
+
+       glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "image_width"), level_width);
+       glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "image_height"), level_height);
+       glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "inv_image_width"), 1.0f / level_width);
+       glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "inv_image_height"), 1.0f / level_height);
+
  //     printf("%d x %d patches on this level\n", width_patches, height_patches);
+
+       // Set up the VAO containing all the required position/texcoord data.
+       GLuint motion_search_vao;
+        glCreateVertexArrays(1, &motion_search_vao);
+        glBindVertexArray(motion_search_vao);
+       glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+       position_attrib = glGetAttribLocation(motion_search_program, "position");
+       glEnableVertexArrayAttrib(motion_search_vao, position_attrib);
+       glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+       texcoord_attrib = glGetAttribLocation(motion_search_program, "texcoord");
+       glEnableVertexArrayAttrib(motion_search_vao, texcoord_attrib);
+       glVertexAttribPointer(texcoord_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+       glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+       // And draw.
+       glViewport(0, 0, width_patches, height_patches);
+       glBindFramebuffer(GL_FRAMEBUFFER, flow_fbo);
+       glUseProgram(motion_search_program);
+       glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+       fprintf(stderr, "err = %d\n", glGetError());
  }
diff --git a/motion_search.frag b/motion_search.frag

index 9865afafd1e92b942e2558232b3eb705997176da..616e2080dc1084342dd1723c283ba9deab755fc9 100644 (file)
--- a/motion_search.frag
+++ b/motion_search.frag
@@ -1,12 +1,113 @@
  #version 450 core
  
-in vec2 tc;
+/*
+  The motion search is one of the two major components of DIS. It works more or less
+  like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in
+  a grid, and for each patch, there's a search to try to find the most similar patch
+  in the other frame.
+
+  Unlike in a typical video codec, the DIS patch search is based on gradient descent;
+  conceptually, you start with an initial guess (the value from the previous level,
+  or the zero flow for the very first level), subtract the reference (“template”)
+  patch from the candidate, look at the gradient to see in what direction there is
+  a lower difference, and then inch a bit toward that direction. (There is seemingly
+  nothing like AdaM, Momentum or similar, but the searched value is only in two
+  dimensions, so perhaps it doesn't matter as much then.)
+
+  DIS does a tweak to this concept. Since the procedure as outlined above requires
+  computing the gradient of the candidate patch, it uses the reference patch as
+  candidate (thus the “inverse” name), and thus uses _its_ gradient to understand
+  in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after
+  all, the two patches are supposed to be quite similar, so their surroundings and
+  thus also gradients should also be quite similar.) It's not entirely clear whether
+  this is still a win on GPU, where calculations are much cheaper, especially
+  the way we parallelize the search, but we've kept it around for now.
+
+  The inverse search is explained and derived in the supplementary material of the
+  paper, section A. Do note that there's a typo; the text under equation 9 claims
+  that the matrix H is n x n (where presumably n is the patch size), while in reality,
+  it's 2x2.
+
+  Our GPU parallellization is fairly dumb right now; we do one patch per fragment
+  (ie., parallellize only over patches, not within each patch), which may not
+  be optimal. In particular, in the initial level, we only have 40 patches,
+  which is on the low side for a GPU, and the memory access patterns may also not
+  be ideal.
+ */
+
+const uint patch_size = 12;
+const uint num_iterations = 16;
+
+in vec2 flow_tc;
+in vec2 patch_bottom_left_texel;  // Center of bottom-left texel of patch.
  out vec2 out_flow;
  
-uniform sampler2D tex;
+uniform sampler2D flow_tex, grad0_tex, image0_tex, image1_tex;
+uniform float image_width, image_height, inv_image_width, inv_image_height;
  
  void main()
  {
-//     out_flow = texture(tex, tc).xy;
-       out_flow = tc.xy;
+       // Lock patch_bottom_left_texel to an integer, so that we never get
+       // any bilinear artifacts for the gradient.
+       vec2 base = round(patch_bottom_left_texel * vec2(image_width, image_height))
+               * vec2(inv_image_width, inv_image_height);
+
+       // First, precompute the pseudo-Hessian for the template patch.
+       // This is the part where we really save by the inverse search
+       // (ie., we can compute it up-front instead of anew for each
+       // patch).
+       //
+       //  H = sum(S^T S)
+       //
+       // where S is the gradient at each point in the patch. Note that
+       // this is an outer product, so we get a (symmetric) 2x2 matrix,
+       // not a scalar.
+       mat2 H = mat2(0.0f);
+       for (uint y = 0; y < patch_size; ++y) {
+               for (uint x = 0; x < patch_size; ++x) {
+                       vec2 tc;
+                       tc.x = base.x + x * inv_image_width;
+                       tc.y = base.y + y * inv_image_height;
+                       vec2 grad = texture(grad0_tex, tc).xy;
+                       H[0][0] += grad.x * grad.x;
+                       H[1][1] += grad.y * grad.y;
+                       H[0][1] += grad.x * grad.y;
+               }
+       }
+       H[1][0] = H[0][1];
+
+       // Make sure we don't get a singular matrix even if e.g. the picture is
+       // all black. (The paper doesn't mention this, but the reference code
+       // does it, and it seems like a reasonable hack to avoid NaNs. With such
+       // a H, we'll go out-of-bounds pretty soon, though.)
+       if (determinant(H) < 1e-6) {
+               H[0][0] += 1e-6;
+               H[1][1] += 1e-6;
+       }
+
+       mat2 H_inv = inverse(H);
+
+       // Fetch the initial guess for the flow.
+       vec2 initial_u = texture(flow_tex, flow_tc).xy;
+       vec2 u = initial_u;
+
+       for (uint i = 0; i < num_iterations; ++i) {
+               vec2 du = vec2(0.0, 0.0);
+               for (uint y = 0; y < patch_size; ++y) {
+                       for (uint x = 0; x < patch_size; ++x) {
+                               vec2 tc;
+                               tc.x = base.x + x * inv_image_width;
+                               tc.y = base.y + y * inv_image_height;
+                               vec2 grad = texture(grad0_tex, tc).xy;
+                               float t = texture(image0_tex, tc).x;
+                               float warped = texture(image1_tex, tc + u).x;
+                               du += grad * (warped - t);
+                       }
+               }
+               u += H_inv * du * vec2(inv_image_width, inv_image_height);
+       }
+
+       // TODO: reject if moving too far
+
+       out_flow = u;
  }
diff --git a/motion_search.vert b/motion_search.vert

new file mode 100644 (file)

index 0000000..d08f499
--- /dev/null
+++ b/motion_search.vert
@@ -0,0 +1,24 @@
+#version 450 core
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 flow_tc;
+out vec2 patch_bottom_left_texel;  // Center of bottom-left texel of patch.
+
+uniform float inv_flow_width, inv_flow_height;
+uniform float inv_image_width, inv_image_height;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       flow_tc = texcoord;
+
+       vec2 patch_bottom_left = texcoord - vec2(0.5, 0.5) * vec2(inv_flow_width, inv_flow_height);
+       patch_bottom_left_texel = patch_bottom_left + vec2(0.5, 0.5) * vec2(inv_image_width, inv_image_height);
+}
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Mon, 2 Jul 2018 22:03:45 +0000 (00:03 +0200)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Mon, 2 Jul 2018 22:03:45 +0000 (00:03 +0200)
flow.cpp		patch \| blob \| history
motion_search.frag		patch \| blob \| history
motion_search.vert	[new file with mode: 0644]	patch \| blob