X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=motion_search.frag;h=163db2cbaf4be29dc74d749e07ce4037dda9b9f6;hb=1622572018b35982441b53b93c78cf6610fb1799;hp=d9d1f4eb4997c7650fb0f4902acf2c42b5ff4e4c;hpb=bc86ab33f50d1bb42240d495051c38d1e962348e;p=nageru

diff --git a/motion_search.frag b/motion_search.frag
index d9d1f4e..163db2c 100644
--- a/motion_search.frag
+++ b/motion_search.frag
@@ -40,16 +40,16 @@ const uint num_iterations = 16;
 
 in vec2 flow_tc;
 in vec2 patch_bottom_left_texel;  // Center of bottom-left texel of patch.
-out vec2 out_flow;
+out vec3 out_flow;
 
 uniform sampler2D flow_tex, grad0_tex, image0_tex, image1_tex;
-uniform vec2 image_size, inv_image_size;
+uniform vec2 image_size, inv_image_size, inv_prev_level_size;
 
 void main()
 {
 	// Lock patch_bottom_left_texel to an integer, so that we never get
 	// any bilinear artifacts for the gradient.
-	vec2 base = round(patch_bottom_left_texel * image_size)
+	vec2 base = (round(patch_bottom_left_texel * image_size - vec2(0.5, 0.5)) + vec2(0.5, 0.5))
 		* inv_image_size;
 
 	// First, precompute the pseudo-Hessian for the template patch.
@@ -90,21 +90,14 @@ void main()
 
 	mat2 H_inv = inverse(H);
 
-	// Fetch the initial guess for the flow. (We need the normalization step
-	// because densification works by accumulating; see the comments on the
-	// Densify class.)
-	vec3 prev_flow = texture(flow_tex, flow_tc).xyz;
-	vec2 initial_u;
-	if (prev_flow.z < 1e-3) {
-		initial_u = vec2(0.0, 0.0);
-	} else {
-		initial_u = prev_flow.xy / prev_flow.z;
-	}
+	// Fetch the initial guess for the flow.
+	vec2 initial_u = texture(flow_tex, flow_tc).xy * inv_prev_level_size;
 
 	// Note: The flow is in OpenGL coordinates [0..1], but the calculations
 	// generally come out in pixels since the gradient is in pixels,
 	// so we need to convert at the end.
 	vec2 u = initial_u;
+	float mean_diff, first_mean_diff;
 
 	for (uint i = 0; i < num_iterations; ++i) {
 		vec2 du = vec2(0.0, 0.0);
@@ -131,15 +124,30 @@ void main()
 		//   sum(S^T * (x - y)) = [what we calculated] - (Âµ1 - Âµ2) sum(S^T)
 		//
 		// so we can just subtract away the mean difference here.
-		du -= grad_sum * (warped_sum - template_sum) * (1.0 / (patch_size * patch_size));
+		mean_diff = (warped_sum - template_sum) * (1.0 / (patch_size * patch_size));
+		du -= grad_sum * mean_diff;
+
+		if (i == 0) {
+			first_mean_diff = mean_diff;
+		}
 
-		u += (H_inv * du) * inv_image_size;
+		// Do the actual update.
+		u -= (H_inv * du) * inv_image_size;
 	}
 
-	// Reject if we moved too far.
-	if (length((u - initial_u) * image_size) > patch_size) {
+	// Reject if we moved too far. Also reject if the patch goes out-of-bounds
+	// (the paper does not mention this, but the code does, and it seems to be
+	// critical to avoid really bad behavior at the edges).
+	if ((length((u - initial_u) * image_size) > patch_size) ||
+	     u.x * image_size.x < -(patch_size * 0.5f) ||
+	     (1.0 - u.x) * image_size.x < -(patch_size * 0.5f) ||
+	     u.y * image_size.y < -(patch_size * 0.5f) ||
+	     (1.0 - u.y) * image_size.y < -(patch_size * 0.5f)) {
 		u = initial_u;
+		mean_diff = first_mean_diff;
 	}
 
-	out_flow = u;
+	// NOTE: The mean patch diff will be for the second-to-last patch,
+	// not the true position of du. But hopefully, it will be very close.
+	out_flow = vec3(u.x, u.y, mean_diff);
 }