git.sesse.net Git - nageru/blob - futatabi/motion_search.frag

   1 #version 450 core
   2
   3 /*
   4   The motion search is one of the two major components of DIS. It works more or less
   5   like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in
   6   a grid, and for each patch, there's a search to try to find the most similar patch
   7   in the other frame.
   8
   9   Unlike in a typical video codec, the DIS patch search is based on gradient descent;
  10   conceptually, you start with an initial guess (the value from the previous level,
  11   or the zero flow for the very first level), subtract the reference (“template”)
  12   patch from the candidate, look at the gradient to see in what direction there is
  13   a lower difference, and then inch a bit toward that direction. (There is seemingly
  14   nothing like AdaM, Momentum or similar, but the searched value is only in two
  15   dimensions, so perhaps it doesn't matter as much then.)
  16
  17   DIS does a tweak to this concept. Since the procedure as outlined above requires
  18   computing the gradient of the candidate patch, it uses the reference patch as
  19   candidate (thus the “inverse” name), and thus uses _its_ gradient to understand
  20   in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after
  21   all, the two patches are supposed to be quite similar, so their surroundings and
  22   thus also gradients should also be quite similar.) It's not entirely clear whether
  23   this is still a win on GPU, where calculations are much cheaper, especially
  24   the way we parallelize the search, but we've kept it around for now.
  25
  26   The inverse search is explained and derived in the supplementary material of the
  27   paper, section A. Do note that there's a typo; the text under equation 9 claims
  28   that the matrix H is n x n (where presumably n is the patch size), while in reality,
  29   it's 2x2.
  30
  31   Our GPU parallellization is fairly dumb right now; we do one patch per fragment
  32   (ie., parallellize only over patches, not within each patch), which may not
  33   be optimal. In particular, in the initial level, we only have 40 patches,
  34   which is on the low side for a GPU, and the memory access patterns may also not
  35   be ideal.
  36  */
  37
  38 in vec3 flow_tc;
  39 in vec2 patch_center;
  40 flat in int ref_layer, search_layer;
  41 out vec3 out_flow;
  42
  43 uniform sampler2DArray flow_tex, image_tex;
  44 uniform usampler2DArray grad_tex;  // Also contains the corresponding reference image.
  45 uniform vec2 inv_image_size, inv_prev_level_size;
  46 uniform uint patch_size;
  47 uniform uint num_iterations;
  48
  49 vec3 unpack_gradients(uint v)
  50 {
  51         uint vi = v & 0xffu;
  52         uint xi = (v >> 8) & 0xfffu;
  53         uint yi = v >> 20;
  54         vec3 r = vec3(xi * (1.0f / 4095.0f) - 0.5f, yi * (1.0f / 4095.0f) - 0.5f, vi * (1.0f / 255.0f));
  55         return r;
  56 }
  57
  58 // Note: The third variable is the actual pixel value.
  59 vec3 get_gradients(vec3 tc)
  60 {
  61         vec3 grad = unpack_gradients(texture(grad_tex, tc).x);
  62
  63         // Zero gradients outside the image. (We'd do this with a sampler,
  64         // but we want the repeat behavior for the actual texels, in the
  65         // z channel.)
  66         if (any(lessThan(tc.xy, vec2(0.0f))) || any(greaterThan(tc.xy, vec2(1.0f)))) {
  67                 grad.xy = vec2(0.0f);
  68         }
  69
  70         return grad;
  71 }
  72
  73 void main()
  74 {
  75         vec2 image_size = textureSize(grad_tex, 0).xy;
  76
  77         // Lock the patch center to an integer, so that we never get
  78         // any bilinear artifacts for the gradient. (NOTE: This assumes an
  79         // even patch size.) Then calculate the bottom-left texel of the patch.
  80         vec2 base = (round(patch_center * image_size) - (0.5f * patch_size - 0.5f))
  81                 * inv_image_size;
  82
  83         // First, precompute the pseudo-Hessian for the template patch.
  84         // This is the part where we really save by the inverse search
  85         // (ie., we can compute it up-front instead of anew for each
  86         // patch).
  87         //
  88         //  H = sum(S^T S)
  89         //
  90         // where S is the gradient at each point in the patch. Note that
  91         // this is an outer product, so we get a (symmetric) 2x2 matrix,
  92         // not a scalar.
  93         mat2 H = mat2(0.0f);
  94         vec2 grad_sum = vec2(0.0f);  // Used for patch normalization.
  95         float template_sum = 0.0f;
  96         for (uint y = 0; y < patch_size; ++y) {
  97                 for (uint x = 0; x < patch_size; ++x) {
  98                         vec2 tc = base + uvec2(x, y) * inv_image_size;
  99                         vec3 grad = get_gradients(vec3(tc, ref_layer));
 100                         H[0][0] += grad.x * grad.x;
 101                         H[1][1] += grad.y * grad.y;
 102                         H[0][1] += grad.x * grad.y;
 103
 104                         template_sum += grad.z;  // The actual template pixel value.
 105                         grad_sum += grad.xy;
 106                 }
 107         }
 108         H[1][0] = H[0][1];
 109
 110         // Make sure we don't get a singular matrix even if e.g. the picture is
 111         // all black. (The paper doesn't mention this, but the reference code
 112         // does it, and it seems like a reasonable hack to avoid NaNs. With such
 113         // a H, we'll go out-of-bounds pretty soon, though.)
 114         if (determinant(H) < 1e-6) {
 115                 H[0][0] += 1e-6;
 116                 H[1][1] += 1e-6;
 117         }
 118
 119         mat2 H_inv = inverse(H);
 120
 121         // Fetch the initial guess for the flow, and convert from the previous size to this one.
 122         vec2 initial_u = texture(flow_tex, flow_tc).xy * (image_size * inv_prev_level_size);
 123         vec2 u = initial_u;
 124         float mean_diff, first_mean_diff;
 125
 126         for (uint i = 0; i < num_iterations; ++i) {
 127                 vec2 du = vec2(0.0, 0.0);
 128                 float warped_sum = 0.0f;
 129                 vec2 u_norm = u * inv_image_size;  // In [0..1] coordinates instead of pixels.
 130                 for (uint y = 0; y < patch_size; ++y) {
 131                         for (uint x = 0; x < patch_size; ++x) {
 132                                 vec2 tc = base + uvec2(x, y) * inv_image_size;
 133                                 vec3 grad = get_gradients(vec3(tc, ref_layer));
 134                                 float t = grad.z;
 135                                 float warped = texture(image_tex, vec3(tc + u_norm, search_layer)).x;
 136                                 du += grad.xy * (warped - t);
 137                                 warped_sum += warped;
 138                         }
 139                 }
 140
 141                 // Subtract the mean for patch normalization. We've done our
 142                 // sums without subtracting the means (because we didn't know them
 143                 // beforehand), ie.:
 144                 //
 145                 //   sum(S^T * ((x + µ1) - (y + µ2))) = sum(S^T * (x - y)) + (µ1 – µ2) sum(S^T)
 146                 //
 147                 // which gives trivially
 148                 //
 149                 //   sum(S^T * (x - y)) = [what we calculated] - (µ1 - µ2) sum(S^T)
 150                 //
 151                 // so we can just subtract away the mean difference here.
 152                 mean_diff = (warped_sum - template_sum) * (1.0 / float(patch_size * patch_size));
 153                 du -= grad_sum * mean_diff;
 154
 155                 if (i == 0) {
 156                         first_mean_diff = mean_diff;
 157                 }
 158
 159                 // Do the actual update.
 160                 u -= H_inv * du;
 161         }
 162
 163         // Reject if we moved too far. Note that the paper says “too far” is the
 164         // patch size, but the DIS code uses half of a patch size. The latter seems
 165         // to give much better overall results.
 166         //
 167         // Also reject if the patch goes out-of-bounds (the paper does not mention this,
 168         // but the code does, and it seems to be critical to avoid really bad behavior
 169         // at the edges).
 170         vec2 patch_center = (base * image_size - 0.5f) + patch_size * 0.5f + u;
 171         if (length(u - initial_u) > (patch_size * 0.5f) ||
 172             patch_center.x < -(patch_size * 0.5f) ||
 173             image_size.x - patch_center.x < -(patch_size * 0.5f) ||
 174             patch_center.y < -(patch_size * 0.5f) ||
 175             image_size.y - patch_center.y < -(patch_size * 0.5f)) {
 176                 u = initial_u;
 177                 mean_diff = first_mean_diff;
 178         }
 179
 180         // NOTE: The mean patch diff will be for the second-to-last patch,
 181         // not the true position of du. But hopefully, it will be very close.
 182         u *= inv_image_size;
 183         out_flow = vec3(u.x, u.y, mean_diff);
 184 }