Pack the gradients and image together into a single 32-bit texture; seems to help...

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Fri, 3 Aug 2018 18:53:36 +0000 (20:53 +0200)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Fri, 3 Aug 2018 18:53:36 +0000 (20:53 +0200)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 3 Aug 2018 18:53:36 +0000 (20:53 +0200)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 3 Aug 2018 18:53:36 +0000 (20:53 +0200)
diff --git a/flow.cpp b/flow.cpp

index 726e74fad73c1e844f96ee0d24c62571d782dee9..3f3ff90ee6da46af401cf4ec636fc1581fd021bd 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -439,7 +439,7 @@ private:
         GLuint motion_search_program;
  
         GLuint uniform_inv_image_size, uniform_inv_prev_level_size;
-       GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
+       GLuint uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
  };
  
  MotionSearch::MotionSearch()
@@ -450,7 +450,6 @@ MotionSearch::MotionSearch()
  
         uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
         uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
-       uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex");
         uniform_image1_tex = glGetUniformLocation(motion_search_program, "image1_tex");
         uniform_grad0_tex = glGetUniformLocation(motion_search_program, "grad0_tex");
         uniform_flow_tex = glGetUniformLocation(motion_search_program, "flow_tex");
@@ -460,9 +459,8 @@ void MotionSearch::exec(GLuint tex0_view, GLuint tex1_view, GLuint grad0_tex, GL
  {
         glUseProgram(motion_search_program);
  
-       bind_sampler(motion_search_program, uniform_image0_tex, 0, tex0_view, nearest_sampler);
         bind_sampler(motion_search_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
-       bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler);
+       bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, linear_sampler);
         bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler);
  
         glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
@@ -1035,7 +1033,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra
  
                 // Create a new texture; we could be fancy and render use a multi-level
                 // texture, but meh.
-               GLuint grad0_tex = pool.get_texture(GL_RG16F, level_width, level_height);
+               GLuint grad0_tex = pool.get_texture(GL_R32UI, level_width, level_height);
  
                 // Find the derivative.
                 {
diff --git a/motion_search.frag b/motion_search.frag

index 136d316ef4cfe43ac4e4604e12379cec81be9385..9ef607c3fae79146b3b5ab97ab5298d8456b9faa 100644 (file)
--- a/motion_search.frag
+++ b/motion_search.frag
@@ -42,12 +42,37 @@ in vec2 flow_tc;
  in vec2 patch_center;
  out vec3 out_flow;
  
-uniform sampler2D flow_tex, grad0_tex, image0_tex, image1_tex;
+uniform sampler2D flow_tex, image1_tex;
+uniform usampler2D grad0_tex;  // Also contains image0.
  uniform vec2 inv_image_size, inv_prev_level_size;
  
+vec3 unpack_gradients(uint v)
+{
+       uint vi = v & 0xff;
+       uint xi = (v >> 8) & 0xfff;
+       uint yi = v >> 20;
+       vec3 r = vec3(xi * (1.0f / 4095.0f) - 0.5f, yi * (1.0f / 4095.0f) - 0.5f, vi * (1.0f / 255.0f));
+       return r;
+}
+
+// Note: The third variable is the actual pixel value.
+vec3 get_gradients(vec2 tc)
+{
+       vec3 grad = unpack_gradients(texture(grad0_tex, tc).x);
+
+       // Zero gradients outside the image. (We'd do this with a sampler,
+       // but we want the repeat behavior for the actual texels, in the
+       // z channel.)
+       if (any(lessThan(tc, vec2(0.0f))) || any(greaterThan(tc, vec2(1.0f)))) {
+               grad.xy = vec2(0.0f);
+       }
+
+       return grad;
+}
+
  void main()
  {
-       vec2 image_size = textureSize(image0_tex, 0);
+       vec2 image_size = textureSize(grad0_tex, 0);
  
         // Lock the patch center to an integer, so that we never get
         // any bilinear artifacts for the gradient. (NOTE: This assumes an
@@ -71,13 +96,13 @@ void main()
         for (uint y = 0; y < patch_size; ++y) {
                 for (uint x = 0; x < patch_size; ++x) {
                         vec2 tc = base + uvec2(x, y) * inv_image_size;
-                       vec2 grad = texture(grad0_tex, tc).xy;
+                       vec3 grad = get_gradients(tc);
                         H[0][0] += grad.x * grad.x;
                         H[1][1] += grad.y * grad.y;
                         H[0][1] += grad.x * grad.y;
  
-                       template_sum += texture(image0_tex, tc).x;
-                       grad_sum += grad;
+                       template_sum += grad.z;  // The actual template pixel value.
+                       grad_sum += grad.xy;
                 }
         }
         H[1][0] = H[0][1];
@@ -105,10 +130,10 @@ void main()
                 for (uint y = 0; y < patch_size; ++y) {
                         for (uint x = 0; x < patch_size; ++x) {
                                 vec2 tc = base + uvec2(x, y) * inv_image_size;
-                               vec2 grad = texture(grad0_tex, tc).xy;
-                               float t = texture(image0_tex, tc).x;
+                               vec3 grad = get_gradients(tc);
+                               float t = grad.z;
                                 float warped = texture(image1_tex, tc + u_norm).x;
-                               du += grad * (warped - t);
+                               du += grad.xy * (warped - t);
                                 warped_sum += warped;
                         }
                 }
diff --git a/sobel.frag b/sobel.frag

index 90c6d8a93c895368b45cbb18480148cd5583559c..3066300dfec5f5ceb29e795bc8f779a4511a1855 100644 (file)
--- a/sobel.frag
+++ b/sobel.frag
@@ -1,10 +1,21 @@
  #version 450 core
  
  in vec2 tc;
-out vec2 gradients;
+out uint packed_gradients;
  
  uniform sampler2D tex;
  
+uint pack_gradients(float x, float y, float v)
+{
+       x = clamp(x, -0.5f, 0.5f);
+       y = clamp(y, -0.5f, 0.5f);
+
+       uint vi = uint(round(v * 255.0f));
+       uint xi = uint(round((x + 0.5f) * 4095.0f));
+       uint yi = uint(round((y + 0.5f) * 4095.0f));
+       return vi | (xi << 8) | (yi << 20);
+}
+
  void main()
  {
         // There are two common Sobel filters, horizontal and vertical
@@ -36,10 +47,18 @@ void main()
         float right        = textureOffset(tex, tc, ivec2( 1,  0)).x;
         float bottom_right = textureOffset(tex, tc, ivec2( 1, -1)).x;
  
+       vec2 gradients;
         gradients.x = (top_right + 2.0f * right + bottom_right) - (top_left + 2.0f * left + bottom_left);
         gradients.y = (top_left + 2.0 * top + top_right) - (bottom_left + 2.0f * bottom + bottom_right);
  
         // Normalize so that we have a normalized unit of intensity levels per pixel.
         gradients.x *= 0.125;
         gradients.y *= 0.125;
+
+       // Also store the actual pixel value, so that we get it “for free”
+       // when we sample the gradients in motion_search.frag later.
+       float center = texture(tex, tc).x;
+
+       // Pack everything into a single 32-bit value, using simple fixed-point.
+       packed_gradients = pack_gradients(gradients.x, gradients.y, center);
  }
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Fri, 3 Aug 2018 18:53:36 +0000 (20:53 +0200)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Fri, 3 Aug 2018 18:53:36 +0000 (20:53 +0200)
flow.cpp		patch \| blob \| history
motion_search.frag		patch \| blob \| history
sobel.frag		patch \| blob \| history