+ // Lock patch_bottom_left_texel to an integer, so that we never get
+ // any bilinear artifacts for the gradient.
+ vec2 base = round(patch_bottom_left_texel * vec2(image_width, image_height))
+ * vec2(inv_image_width, inv_image_height);
+
+ // First, precompute the pseudo-Hessian for the template patch.
+ // This is the part where we really save by the inverse search
+ // (ie., we can compute it up-front instead of anew for each
+ // patch).
+ //
+ // H = sum(S^T S)
+ //
+ // where S is the gradient at each point in the patch. Note that
+ // this is an outer product, so we get a (symmetric) 2x2 matrix,
+ // not a scalar.
+ mat2 H = mat2(0.0f);
+ for (uint y = 0; y < patch_size; ++y) {
+ for (uint x = 0; x < patch_size; ++x) {
+ vec2 tc;
+ tc.x = base.x + x * inv_image_width;
+ tc.y = base.y + y * inv_image_height;
+ vec2 grad = texture(grad0_tex, tc).xy;
+ H[0][0] += grad.x * grad.x;
+ H[1][1] += grad.y * grad.y;
+ H[0][1] += grad.x * grad.y;
+ }
+ }
+ H[1][0] = H[0][1];
+
+ // Make sure we don't get a singular matrix even if e.g. the picture is
+ // all black. (The paper doesn't mention this, but the reference code
+ // does it, and it seems like a reasonable hack to avoid NaNs. With such
+ // a H, we'll go out-of-bounds pretty soon, though.)
+ if (determinant(H) < 1e-6) {
+ H[0][0] += 1e-6;
+ H[1][1] += 1e-6;
+ }
+
+ mat2 H_inv = inverse(H);
+
+ // Fetch the initial guess for the flow.
+ vec2 initial_u = texture(flow_tex, flow_tc).xy;
+ vec2 u = initial_u;
+
+ for (uint i = 0; i < num_iterations; ++i) {
+ vec2 du = vec2(0.0, 0.0);
+ for (uint y = 0; y < patch_size; ++y) {
+ for (uint x = 0; x < patch_size; ++x) {
+ vec2 tc;
+ tc.x = base.x + x * inv_image_width;
+ tc.y = base.y + y * inv_image_height;
+ vec2 grad = texture(grad0_tex, tc).xy;
+ float t = texture(image0_tex, tc).x;
+ float warped = texture(image1_tex, tc + u).x;
+ du += grad * (warped - t);
+ }
+ }
+ u += H_inv * du * vec2(inv_image_width, inv_image_height);
+ }
+
+ // TODO: reject if moving too far
+
+ out_flow = u;