git.sesse.net Git - nageru/blob - motion_search.frag

   1 #version 450 core
   2
   3 /*
   4   The motion search is one of the two major components of DIS. It works more or less
   5   like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in
   6   a grid, and for each patch, there's a search to try to find the most similar patch
   7   in the other frame.
   8
   9   Unlike in a typical video codec, the DIS patch search is based on gradient descent;
  10   conceptually, you start with an initial guess (the value from the previous level,
  11   or the zero flow for the very first level), subtract the reference (“template”)
  12   patch from the candidate, look at the gradient to see in what direction there is
  13   a lower difference, and then inch a bit toward that direction. (There is seemingly
  14   nothing like AdaM, Momentum or similar, but the searched value is only in two
  15   dimensions, so perhaps it doesn't matter as much then.)
  16
  17   DIS does a tweak to this concept. Since the procedure as outlined above requires
  18   computing the gradient of the candidate patch, it uses the reference patch as
  19   candidate (thus the “inverse” name), and thus uses _its_ gradient to understand
  20   in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after
  21   all, the two patches are supposed to be quite similar, so their surroundings and
  22   thus also gradients should also be quite similar.) It's not entirely clear whether
  23   this is still a win on GPU, where calculations are much cheaper, especially
  24   the way we parallelize the search, but we've kept it around for now.
  25
  26   The inverse search is explained and derived in the supplementary material of the
  27   paper, section A. Do note that there's a typo; the text under equation 9 claims
  28   that the matrix H is n x n (where presumably n is the patch size), while in reality,
  29   it's 2x2.
  30
  31   Our GPU parallellization is fairly dumb right now; we do one patch per fragment
  32   (ie., parallellize only over patches, not within each patch), which may not
  33   be optimal. In particular, in the initial level, we only have 40 patches,
  34   which is on the low side for a GPU, and the memory access patterns may also not
  35   be ideal.
  36  */
  37
  38 const uint patch_size = 12;
  39 const uint num_iterations = 16;
  40
  41 in vec2 flow_tc;
  42 in vec2 patch_bottom_left_texel;  // Center of bottom-left texel of patch.
  43 out vec2 out_flow;
  44
  45 uniform sampler2D flow_tex, grad0_tex, image0_tex, image1_tex;
  46 uniform vec2 image_size, inv_image_size;
  47
  48 void main()
  49 {
  50         // Lock patch_bottom_left_texel to an integer, so that we never get
  51         // any bilinear artifacts for the gradient.
  52         vec2 base = round(patch_bottom_left_texel * image_size)
  53                 * inv_image_size;
  54
  55         // First, precompute the pseudo-Hessian for the template patch.
  56         // This is the part where we really save by the inverse search
  57         // (ie., we can compute it up-front instead of anew for each
  58         // patch).
  59         //
  60         //  H = sum(S^T S)
  61         //
  62         // where S is the gradient at each point in the patch. Note that
  63         // this is an outer product, so we get a (symmetric) 2x2 matrix,
  64         // not a scalar.
  65         mat2 H = mat2(0.0f);
  66         vec2 grad_sum = vec2(0.0f);  // Used for patch normalization.
  67         float template_sum = 0.0f;
  68         for (uint y = 0; y < patch_size; ++y) {
  69                 for (uint x = 0; x < patch_size; ++x) {
  70                         vec2 tc = base + uvec2(x, y) * inv_image_size;
  71                         vec2 grad = texture(grad0_tex, tc).xy;
  72                         H[0][0] += grad.x * grad.x;
  73                         H[1][1] += grad.y * grad.y;
  74                         H[0][1] += grad.x * grad.y;
  75
  76                         template_sum += texture(image0_tex, tc).x;
  77                         grad_sum += grad;
  78                 }
  79         }
  80         H[1][0] = H[0][1];
  81
  82         // Make sure we don't get a singular matrix even if e.g. the picture is
  83         // all black. (The paper doesn't mention this, but the reference code
  84         // does it, and it seems like a reasonable hack to avoid NaNs. With such
  85         // a H, we'll go out-of-bounds pretty soon, though.)
  86         if (determinant(H) < 1e-6) {
  87                 H[0][0] += 1e-6;
  88                 H[1][1] += 1e-6;
  89         }
  90
  91         mat2 H_inv = inverse(H);
  92
  93         // Fetch the initial guess for the flow. (We need the normalization step
  94         // because densification works by accumulating; see the comments on the
  95         // Densify class.)
  96         vec3 prev_flow = texture(flow_tex, flow_tc).xyz;
  97         vec2 initial_u;
  98         if (prev_flow.z < 1e-3) {
  99                 initial_u = vec2(0.0, 0.0);
 100         } else {
 101                 initial_u = prev_flow.xy / prev_flow.z;
 102         }
 103
 104         // Note: The flow is in OpenGL coordinates [0..1], but the calculations
 105         // generally come out in pixels since the gradient is in pixels,
 106         // so we need to convert at the end.
 107         vec2 u = initial_u;
 108
 109         for (uint i = 0; i < num_iterations; ++i) {
 110                 vec2 du = vec2(0.0, 0.0);
 111                 float warped_sum = 0.0f;
 112                 for (uint y = 0; y < patch_size; ++y) {
 113                         for (uint x = 0; x < patch_size; ++x) {
 114                                 vec2 tc = base + uvec2(x, y) * inv_image_size;
 115                                 vec2 grad = texture(grad0_tex, tc).xy;
 116                                 float t = texture(image0_tex, tc).x;
 117                                 float warped = texture(image1_tex, tc + u).x;
 118                                 du += grad * (warped - t);
 119                                 warped_sum += warped;
 120                         }
 121                 }
 122
 123                 // Subtract the mean for patch normalization. We've done our
 124                 // sums without subtracting the means (because we didn't know them
 125                 // beforehand), ie.:
 126                 //
 127                 //   sum(S^T * ((x + µ1) - (y + µ2))) = sum(S^T * (x - y)) + (µ1 – µ2) sum(S^T)
 128                 //
 129                 // which gives trivially
 130                 //
 131                 //   sum(S^T * (x - y)) = [what we calculated] - (µ1 - µ2) sum(S^T)
 132                 //
 133                 // so we can just subtract away the mean difference here.
 134                 du -= grad_sum * (warped_sum - template_sum) * (1.0 / (patch_size * patch_size));
 135
 136                 u += (H_inv * du) * inv_image_size;
 137         }
 138
 139         // Reject if we moved too far.
 140         if (length((u - initial_u) * image_size) > patch_size) {
 141                 u = initial_u;
 142         }
 143
 144         out_flow = u;
 145 }