#define NO_SDL_GLEXT 1
-
+
#define WIDTH 1280
#define HEIGHT 720
glBindBuffer(GL_ARRAY_BUFFER, 0);
return vbo;
-}
+}
+
+void bind_sampler(GLuint program, const char *uniform_name, GLuint texture_unit, GLuint tex, GLuint sampler)
+{
+ GLint location = glGetUniformLocation(program, uniform_name);
+ if (location == -1) {
+ return;
+ }
+
+ glBindTextureUnit(texture_unit, tex);
+ glBindSampler(texture_unit, sampler);
+ glProgramUniform1i(program, location, texture_unit);
+}
int main(void)
{
GLuint tex1 = load_texture("test1500.pgm", WIDTH, HEIGHT);
// Load shaders.
- GLuint motion_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ GLuint motion_vs_obj = compile_shader(read_file("motion_search.vert"), GL_VERTEX_SHADER);
GLuint motion_fs_obj = compile_shader(read_file("motion_search.frag"), GL_FRAGMENT_SHADER);
GLuint motion_search_program = link_program(motion_vs_obj, motion_fs_obj);
GLuint sobel_fs_obj = compile_shader(read_file("sobel.frag"), GL_FRAGMENT_SHADER);
GLuint sobel_program = link_program(sobel_vs_obj, sobel_fs_obj);
+ // Make some samplers.
+ GLuint nearest_sampler;
+ glCreateSamplers(1, &nearest_sampler);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ GLuint linear_sampler;
+ glCreateSamplers(1, &linear_sampler);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ GLuint mipmap_sampler;
+ glCreateSamplers(1, &mipmap_sampler);
+ glSamplerParameteri(mipmap_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
+ glSamplerParameteri(mipmap_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(mipmap_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(mipmap_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
// Coarsest level.
- int level_width = WIDTH >> coarsest_level;
- int level_height = HEIGHT >> coarsest_level;
+ int level = coarsest_level;
+ int level_width = WIDTH >> level;
+ int level_height = HEIGHT >> level;
float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio);
int width_patches = 1 + lrintf((level_width - patch_size_pixels) / patch_spacing_pixels);
int height_patches = 1 + lrintf((level_height - patch_size_pixels) / patch_spacing_pixels);
+ // Make sure we always read from the correct level; the chosen
+ // mipmapping could otherwise be rather unpredictable, especially
+ // during motion search.
+ GLuint tex0_view, tex1_view;
+ glGenTextures(1, &tex0_view);
+ glTextureView(tex0_view, GL_TEXTURE_2D, tex0, GL_R8, level, 1, 0, 1);
+ glGenTextures(1, &tex1_view);
+ glTextureView(tex1_view, GL_TEXTURE_2D, tex1, GL_R8, level, 1, 0, 1);
+
// Compute gradients in every point, used for the motion search.
// The DIS paper doesn't actually mention how these are computed,
// but seemingly, a 3x3 Sobel operator is used here (at least in
// Create a new texture; we could be fancy and render use a multi-level
// texture, but meh.
- GLuint grad_tex;
- glCreateTextures(GL_TEXTURE_2D, 1, &grad_tex);
- glTextureStorage2D(grad_tex, 1, GL_RG16F, level_width, level_height);
+ GLuint grad0_tex;
+ glCreateTextures(GL_TEXTURE_2D, 1, &grad0_tex);
+ glTextureStorage2D(grad0_tex, 1, GL_RG16F, level_width, level_height);
- GLuint grad_fbo;
- glCreateFramebuffers(1, &grad_fbo);
- glNamedFramebufferTexture(grad_fbo, GL_COLOR_ATTACHMENT0, grad_tex, 0);
+ GLuint grad0_fbo;
+ glCreateFramebuffers(1, &grad0_fbo);
+ glNamedFramebufferTexture(grad0_fbo, GL_COLOR_ATTACHMENT0, grad0_tex, 0);
glUseProgram(sobel_program);
- glBindTextureUnit(0, tex0);
+ glBindTextureUnit(0, tex0_view);
+ glBindSampler(0, nearest_sampler);
glProgramUniform1i(sobel_program, glGetUniformLocation(sobel_program, "tex"), 0);
glProgramUniform1f(sobel_program, glGetUniformLocation(sobel_program, "inv_width"), 1.0f / level_width);
glProgramUniform1f(sobel_program, glGetUniformLocation(sobel_program, "inv_height"), 1.0f / level_height);
// Now finally draw.
glViewport(0, 0, level_width, level_height);
- glBindFramebuffer(GL_FRAMEBUFFER, grad_fbo);
+ glBindFramebuffer(GL_FRAMEBUFFER, grad0_fbo);
glUseProgram(sobel_program);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- glUseProgram(0);
- glBindFramebuffer(GL_FRAMEBUFFER, 0);
- glBindVertexArray(0);
+ // Motion search to find the initial flow.
+
+ // Create a flow texture, initialized to zero.
GLuint flow_tex;
glCreateTextures(GL_TEXTURE_2D, 1, &flow_tex);
glTextureStorage2D(flow_tex, 1, GL_RG16F, width_patches, height_patches);
+ // And an output flow texture. (Well, we could have used texture barriers,
+ // but I don't feel lucky today.)
+ GLuint flow_out_tex;
+ glCreateTextures(GL_TEXTURE_2D, 1, &flow_out_tex);
+ glTextureStorage2D(flow_out_tex, 1, GL_RG16F, width_patches, height_patches);
+
+ GLuint flow_fbo;
+ glCreateFramebuffers(1, &flow_fbo);
+ glNamedFramebufferTexture(flow_fbo, GL_COLOR_ATTACHMENT0, flow_out_tex, 0);
+
+ glUseProgram(motion_search_program);
+
+ bind_sampler(motion_search_program, "image0_tex", 0, tex0_view, nearest_sampler);
+ bind_sampler(motion_search_program, "image1_tex", 1, tex1_view, linear_sampler);
+ bind_sampler(motion_search_program, "grad0_tex", 2, grad0_tex, nearest_sampler);
+ bind_sampler(motion_search_program, "flow_tex", 3, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "image_width"), level_width);
+ glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "image_height"), level_height);
+ glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "inv_image_width"), 1.0f / level_width);
+ glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "inv_image_height"), 1.0f / level_height);
+
// printf("%d x %d patches on this level\n", width_patches, height_patches);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ GLuint motion_search_vao;
+ glCreateVertexArrays(1, &motion_search_vao);
+ glBindVertexArray(motion_search_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ position_attrib = glGetAttribLocation(motion_search_program, "position");
+ glEnableVertexArrayAttrib(motion_search_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ texcoord_attrib = glGetAttribLocation(motion_search_program, "texcoord");
+ glEnableVertexArrayAttrib(motion_search_vao, texcoord_attrib);
+ glVertexAttribPointer(texcoord_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+ // And draw.
+ glViewport(0, 0, width_patches, height_patches);
+ glBindFramebuffer(GL_FRAMEBUFFER, flow_fbo);
+ glUseProgram(motion_search_program);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ fprintf(stderr, "err = %d\n", glGetError());
}
#version 450 core
-in vec2 tc;
+/*
+ The motion search is one of the two major components of DIS. It works more or less
+ like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in
+ a grid, and for each patch, there's a search to try to find the most similar patch
+ in the other frame.
+
+ Unlike in a typical video codec, the DIS patch search is based on gradient descent;
+ conceptually, you start with an initial guess (the value from the previous level,
+ or the zero flow for the very first level), subtract the reference (“template”)
+ patch from the candidate, look at the gradient to see in what direction there is
+ a lower difference, and then inch a bit toward that direction. (There is seemingly
+ nothing like AdaM, Momentum or similar, but the searched value is only in two
+ dimensions, so perhaps it doesn't matter as much then.)
+
+ DIS does a tweak to this concept. Since the procedure as outlined above requires
+ computing the gradient of the candidate patch, it uses the reference patch as
+ candidate (thus the “inverse” name), and thus uses _its_ gradient to understand
+ in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after
+ all, the two patches are supposed to be quite similar, so their surroundings and
+ thus also gradients should also be quite similar.) It's not entirely clear whether
+ this is still a win on GPU, where calculations are much cheaper, especially
+ the way we parallelize the search, but we've kept it around for now.
+
+ The inverse search is explained and derived in the supplementary material of the
+ paper, section A. Do note that there's a typo; the text under equation 9 claims
+ that the matrix H is n x n (where presumably n is the patch size), while in reality,
+ it's 2x2.
+
+ Our GPU parallellization is fairly dumb right now; we do one patch per fragment
+ (ie., parallellize only over patches, not within each patch), which may not
+ be optimal. In particular, in the initial level, we only have 40 patches,
+ which is on the low side for a GPU, and the memory access patterns may also not
+ be ideal.
+ */
+
+const uint patch_size = 12;
+const uint num_iterations = 16;
+
+in vec2 flow_tc;
+in vec2 patch_bottom_left_texel; // Center of bottom-left texel of patch.
out vec2 out_flow;
-uniform sampler2D tex;
+uniform sampler2D flow_tex, grad0_tex, image0_tex, image1_tex;
+uniform float image_width, image_height, inv_image_width, inv_image_height;
void main()
{
-// out_flow = texture(tex, tc).xy;
- out_flow = tc.xy;
+ // Lock patch_bottom_left_texel to an integer, so that we never get
+ // any bilinear artifacts for the gradient.
+ vec2 base = round(patch_bottom_left_texel * vec2(image_width, image_height))
+ * vec2(inv_image_width, inv_image_height);
+
+ // First, precompute the pseudo-Hessian for the template patch.
+ // This is the part where we really save by the inverse search
+ // (ie., we can compute it up-front instead of anew for each
+ // patch).
+ //
+ // H = sum(S^T S)
+ //
+ // where S is the gradient at each point in the patch. Note that
+ // this is an outer product, so we get a (symmetric) 2x2 matrix,
+ // not a scalar.
+ mat2 H = mat2(0.0f);
+ for (uint y = 0; y < patch_size; ++y) {
+ for (uint x = 0; x < patch_size; ++x) {
+ vec2 tc;
+ tc.x = base.x + x * inv_image_width;
+ tc.y = base.y + y * inv_image_height;
+ vec2 grad = texture(grad0_tex, tc).xy;
+ H[0][0] += grad.x * grad.x;
+ H[1][1] += grad.y * grad.y;
+ H[0][1] += grad.x * grad.y;
+ }
+ }
+ H[1][0] = H[0][1];
+
+ // Make sure we don't get a singular matrix even if e.g. the picture is
+ // all black. (The paper doesn't mention this, but the reference code
+ // does it, and it seems like a reasonable hack to avoid NaNs. With such
+ // a H, we'll go out-of-bounds pretty soon, though.)
+ if (determinant(H) < 1e-6) {
+ H[0][0] += 1e-6;
+ H[1][1] += 1e-6;
+ }
+
+ mat2 H_inv = inverse(H);
+
+ // Fetch the initial guess for the flow.
+ vec2 initial_u = texture(flow_tex, flow_tc).xy;
+ vec2 u = initial_u;
+
+ for (uint i = 0; i < num_iterations; ++i) {
+ vec2 du = vec2(0.0, 0.0);
+ for (uint y = 0; y < patch_size; ++y) {
+ for (uint x = 0; x < patch_size; ++x) {
+ vec2 tc;
+ tc.x = base.x + x * inv_image_width;
+ tc.y = base.y + y * inv_image_height;
+ vec2 grad = texture(grad0_tex, tc).xy;
+ float t = texture(image0_tex, tc).x;
+ float warped = texture(image1_tex, tc + u).x;
+ du += grad * (warped - t);
+ }
+ }
+ u += H_inv * du * vec2(inv_image_width, inv_image_height);
+ }
+
+ // TODO: reject if moving too far
+
+ out_flow = u;
}