From 804409c84346fc01270a388e099b8a99528ed0c6 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 3 Jul 2018 00:03:45 +0200 Subject: [PATCH] Start implementing motion search. --- flow.cpp | 123 +++++++++++++++++++++++++++++++++++++++------ motion_search.frag | 109 +++++++++++++++++++++++++++++++++++++-- motion_search.vert | 24 +++++++++ 3 files changed, 236 insertions(+), 20 deletions(-) create mode 100644 motion_search.vert diff --git a/flow.cpp b/flow.cpp index 3100c50..f4fbb74 100644 --- a/flow.cpp +++ b/flow.cpp @@ -1,5 +1,5 @@ #define NO_SDL_GLEXT 1 - + #define WIDTH 1280 #define HEIGHT 720 @@ -185,7 +185,19 @@ GLuint fill_vertex_attribute(GLuint vao, GLuint glsl_program_num, const string & glBindBuffer(GL_ARRAY_BUFFER, 0); return vbo; -} +} + +void bind_sampler(GLuint program, const char *uniform_name, GLuint texture_unit, GLuint tex, GLuint sampler) +{ + GLint location = glGetUniformLocation(program, uniform_name); + if (location == -1) { + return; + } + + glBindTextureUnit(texture_unit, tex); + glBindSampler(texture_unit, sampler); + glProgramUniform1i(program, location, texture_unit); +} int main(void) { @@ -215,7 +227,7 @@ int main(void) GLuint tex1 = load_texture("test1500.pgm", WIDTH, HEIGHT); // Load shaders. - GLuint motion_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); + GLuint motion_vs_obj = compile_shader(read_file("motion_search.vert"), GL_VERTEX_SHADER); GLuint motion_fs_obj = compile_shader(read_file("motion_search.frag"), GL_FRAGMENT_SHADER); GLuint motion_search_program = link_program(motion_vs_obj, motion_fs_obj); @@ -223,13 +235,45 @@ int main(void) GLuint sobel_fs_obj = compile_shader(read_file("sobel.frag"), GL_FRAGMENT_SHADER); GLuint sobel_program = link_program(sobel_vs_obj, sobel_fs_obj); + // Make some samplers. + GLuint nearest_sampler; + glCreateSamplers(1, &nearest_sampler); + glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + GLuint linear_sampler; + glCreateSamplers(1, &linear_sampler); + glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + GLuint mipmap_sampler; + glCreateSamplers(1, &mipmap_sampler); + glSamplerParameteri(mipmap_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST); + glSamplerParameteri(mipmap_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(mipmap_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(mipmap_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + // Coarsest level. - int level_width = WIDTH >> coarsest_level; - int level_height = HEIGHT >> coarsest_level; + int level = coarsest_level; + int level_width = WIDTH >> level; + int level_height = HEIGHT >> level; float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio); int width_patches = 1 + lrintf((level_width - patch_size_pixels) / patch_spacing_pixels); int height_patches = 1 + lrintf((level_height - patch_size_pixels) / patch_spacing_pixels); + // Make sure we always read from the correct level; the chosen + // mipmapping could otherwise be rather unpredictable, especially + // during motion search. + GLuint tex0_view, tex1_view; + glGenTextures(1, &tex0_view); + glTextureView(tex0_view, GL_TEXTURE_2D, tex0, GL_R8, level, 1, 0, 1); + glGenTextures(1, &tex1_view); + glTextureView(tex1_view, GL_TEXTURE_2D, tex1, GL_R8, level, 1, 0, 1); + // Compute gradients in every point, used for the motion search. // The DIS paper doesn't actually mention how these are computed, // but seemingly, a 3x3 Sobel operator is used here (at least in @@ -241,16 +285,17 @@ int main(void) // Create a new texture; we could be fancy and render use a multi-level // texture, but meh. - GLuint grad_tex; - glCreateTextures(GL_TEXTURE_2D, 1, &grad_tex); - glTextureStorage2D(grad_tex, 1, GL_RG16F, level_width, level_height); + GLuint grad0_tex; + glCreateTextures(GL_TEXTURE_2D, 1, &grad0_tex); + glTextureStorage2D(grad0_tex, 1, GL_RG16F, level_width, level_height); - GLuint grad_fbo; - glCreateFramebuffers(1, &grad_fbo); - glNamedFramebufferTexture(grad_fbo, GL_COLOR_ATTACHMENT0, grad_tex, 0); + GLuint grad0_fbo; + glCreateFramebuffers(1, &grad0_fbo); + glNamedFramebufferTexture(grad0_fbo, GL_COLOR_ATTACHMENT0, grad0_tex, 0); glUseProgram(sobel_program); - glBindTextureUnit(0, tex0); + glBindTextureUnit(0, tex0_view); + glBindSampler(0, nearest_sampler); glProgramUniform1i(sobel_program, glGetUniformLocation(sobel_program, "tex"), 0); glProgramUniform1f(sobel_program, glGetUniformLocation(sobel_program, "inv_width"), 1.0f / level_width); glProgramUniform1f(sobel_program, glGetUniformLocation(sobel_program, "inv_height"), 1.0f / level_height); @@ -282,16 +327,62 @@ int main(void) // Now finally draw. glViewport(0, 0, level_width, level_height); - glBindFramebuffer(GL_FRAMEBUFFER, grad_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, grad0_fbo); glUseProgram(sobel_program); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glUseProgram(0); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - glBindVertexArray(0); + // Motion search to find the initial flow. + + // Create a flow texture, initialized to zero. GLuint flow_tex; glCreateTextures(GL_TEXTURE_2D, 1, &flow_tex); glTextureStorage2D(flow_tex, 1, GL_RG16F, width_patches, height_patches); + // And an output flow texture. (Well, we could have used texture barriers, + // but I don't feel lucky today.) + GLuint flow_out_tex; + glCreateTextures(GL_TEXTURE_2D, 1, &flow_out_tex); + glTextureStorage2D(flow_out_tex, 1, GL_RG16F, width_patches, height_patches); + + GLuint flow_fbo; + glCreateFramebuffers(1, &flow_fbo); + glNamedFramebufferTexture(flow_fbo, GL_COLOR_ATTACHMENT0, flow_out_tex, 0); + + glUseProgram(motion_search_program); + + bind_sampler(motion_search_program, "image0_tex", 0, tex0_view, nearest_sampler); + bind_sampler(motion_search_program, "image1_tex", 1, tex1_view, linear_sampler); + bind_sampler(motion_search_program, "grad0_tex", 2, grad0_tex, nearest_sampler); + bind_sampler(motion_search_program, "flow_tex", 3, flow_tex, nearest_sampler); + + glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "image_width"), level_width); + glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "image_height"), level_height); + glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "inv_image_width"), 1.0f / level_width); + glProgramUniform1f(motion_search_program, glGetUniformLocation(motion_search_program, "inv_image_height"), 1.0f / level_height); + // printf("%d x %d patches on this level\n", width_patches, height_patches); + + // Set up the VAO containing all the required position/texcoord data. + GLuint motion_search_vao; + glCreateVertexArrays(1, &motion_search_vao); + glBindVertexArray(motion_search_vao); + glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); + + position_attrib = glGetAttribLocation(motion_search_program, "position"); + glEnableVertexArrayAttrib(motion_search_vao, position_attrib); + glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); + + texcoord_attrib = glGetAttribLocation(motion_search_program, "texcoord"); + glEnableVertexArrayAttrib(motion_search_vao, texcoord_attrib); + glVertexAttribPointer(texcoord_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + + // And draw. + glViewport(0, 0, width_patches, height_patches); + glBindFramebuffer(GL_FRAMEBUFFER, flow_fbo); + glUseProgram(motion_search_program); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + fprintf(stderr, "err = %d\n", glGetError()); } diff --git a/motion_search.frag b/motion_search.frag index 9865afa..616e208 100644 --- a/motion_search.frag +++ b/motion_search.frag @@ -1,12 +1,113 @@ #version 450 core -in vec2 tc; +/* + The motion search is one of the two major components of DIS. It works more or less + like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in + a grid, and for each patch, there's a search to try to find the most similar patch + in the other frame. + + Unlike in a typical video codec, the DIS patch search is based on gradient descent; + conceptually, you start with an initial guess (the value from the previous level, + or the zero flow for the very first level), subtract the reference (“template”) + patch from the candidate, look at the gradient to see in what direction there is + a lower difference, and then inch a bit toward that direction. (There is seemingly + nothing like AdaM, Momentum or similar, but the searched value is only in two + dimensions, so perhaps it doesn't matter as much then.) + + DIS does a tweak to this concept. Since the procedure as outlined above requires + computing the gradient of the candidate patch, it uses the reference patch as + candidate (thus the “inverse” name), and thus uses _its_ gradient to understand + in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after + all, the two patches are supposed to be quite similar, so their surroundings and + thus also gradients should also be quite similar.) It's not entirely clear whether + this is still a win on GPU, where calculations are much cheaper, especially + the way we parallelize the search, but we've kept it around for now. + + The inverse search is explained and derived in the supplementary material of the + paper, section A. Do note that there's a typo; the text under equation 9 claims + that the matrix H is n x n (where presumably n is the patch size), while in reality, + it's 2x2. + + Our GPU parallellization is fairly dumb right now; we do one patch per fragment + (ie., parallellize only over patches, not within each patch), which may not + be optimal. In particular, in the initial level, we only have 40 patches, + which is on the low side for a GPU, and the memory access patterns may also not + be ideal. + */ + +const uint patch_size = 12; +const uint num_iterations = 16; + +in vec2 flow_tc; +in vec2 patch_bottom_left_texel; // Center of bottom-left texel of patch. out vec2 out_flow; -uniform sampler2D tex; +uniform sampler2D flow_tex, grad0_tex, image0_tex, image1_tex; +uniform float image_width, image_height, inv_image_width, inv_image_height; void main() { -// out_flow = texture(tex, tc).xy; - out_flow = tc.xy; + // Lock patch_bottom_left_texel to an integer, so that we never get + // any bilinear artifacts for the gradient. + vec2 base = round(patch_bottom_left_texel * vec2(image_width, image_height)) + * vec2(inv_image_width, inv_image_height); + + // First, precompute the pseudo-Hessian for the template patch. + // This is the part where we really save by the inverse search + // (ie., we can compute it up-front instead of anew for each + // patch). + // + // H = sum(S^T S) + // + // where S is the gradient at each point in the patch. Note that + // this is an outer product, so we get a (symmetric) 2x2 matrix, + // not a scalar. + mat2 H = mat2(0.0f); + for (uint y = 0; y < patch_size; ++y) { + for (uint x = 0; x < patch_size; ++x) { + vec2 tc; + tc.x = base.x + x * inv_image_width; + tc.y = base.y + y * inv_image_height; + vec2 grad = texture(grad0_tex, tc).xy; + H[0][0] += grad.x * grad.x; + H[1][1] += grad.y * grad.y; + H[0][1] += grad.x * grad.y; + } + } + H[1][0] = H[0][1]; + + // Make sure we don't get a singular matrix even if e.g. the picture is + // all black. (The paper doesn't mention this, but the reference code + // does it, and it seems like a reasonable hack to avoid NaNs. With such + // a H, we'll go out-of-bounds pretty soon, though.) + if (determinant(H) < 1e-6) { + H[0][0] += 1e-6; + H[1][1] += 1e-6; + } + + mat2 H_inv = inverse(H); + + // Fetch the initial guess for the flow. + vec2 initial_u = texture(flow_tex, flow_tc).xy; + vec2 u = initial_u; + + for (uint i = 0; i < num_iterations; ++i) { + vec2 du = vec2(0.0, 0.0); + for (uint y = 0; y < patch_size; ++y) { + for (uint x = 0; x < patch_size; ++x) { + vec2 tc; + tc.x = base.x + x * inv_image_width; + tc.y = base.y + y * inv_image_height; + vec2 grad = texture(grad0_tex, tc).xy; + float t = texture(image0_tex, tc).x; + float warped = texture(image1_tex, tc + u).x; + du += grad * (warped - t); + } + } + u += H_inv * du * vec2(inv_image_width, inv_image_height); + } + + // TODO: reject if moving too far + + out_flow = u; } diff --git a/motion_search.vert b/motion_search.vert new file mode 100644 index 0000000..d08f499 --- /dev/null +++ b/motion_search.vert @@ -0,0 +1,24 @@ +#version 450 core + +in vec2 position; +in vec2 texcoord; +out vec2 flow_tc; +out vec2 patch_bottom_left_texel; // Center of bottom-left texel of patch. + +uniform float inv_flow_width, inv_flow_height; +uniform float inv_image_width, inv_image_height; + +void main() +{ + // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: + // + // 2.000 0.000 0.000 -1.000 + // 0.000 2.000 0.000 -1.000 + // 0.000 0.000 -2.000 -1.000 + // 0.000 0.000 0.000 1.000 + gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); + flow_tc = texcoord; + + vec2 patch_bottom_left = texcoord - vec2(0.5, 0.5) * vec2(inv_flow_width, inv_flow_height); + patch_bottom_left_texel = patch_bottom_left + vec2(0.5, 0.5) * vec2(inv_image_width, inv_image_height); +} -- 2.39.2