X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=derivatives.frag;h=0e2fd687c9cd483ba2f33d5ca40f40046c0eea80;hb=3795723be95f2fe82f3c8b8b45b1a905b2c811fd;hp=f5394c37a3f26031ca54d4244824568ae6477af6;hpb=e08fc3634afa41320df5dea0ec44feb3d4e4e679;p=nageru

diff --git a/derivatives.frag b/derivatives.frag
index f5394c3..0e2fd68 100644
--- a/derivatives.frag
+++ b/derivatives.frag
@@ -1,23 +1,32 @@
 #version 450 core
 
-in vec2 tc;
+in vec3 tc;
 out vec2 derivatives;
+out float beta_0;
 
-uniform sampler2D tex;
-uniform vec2 inv_image_size;
+uniform sampler2DArray tex;
 
 void main()
 {
-	float x_m2 = texture(tex, vec2(tc.x - 2.0 * inv_image_size.x), tc.y).x;
-	float x_m1 = texture(tex, vec2(tc.x -       inv_image_size.x), tc.y).x;
-	float x_p1 = texture(tex, vec2(tc.x +       inv_image_size.x), tc.y).x;
-	float x_p2 = texture(tex, vec2(tc.x + 2.0 * inv_image_size.x), tc.y).x;
+	float x_m2 = textureOffset(tex, tc, ivec2(-2,  0)).x;
+	float x_m1 = textureOffset(tex, tc, ivec2(-1,  0)).x;
+	float x_p1 = textureOffset(tex, tc, ivec2( 1,  0)).x;
+	float x_p2 = textureOffset(tex, tc, ivec2( 2,  0)).x;
 
-	float y_m2 = texture(tex, vec2(tc.x, tc.y - 2.0 * inv_image_size.y)).x;
-	float y_m1 = texture(tex, vec2(tc.x, tc.y -       inv_image_size.y)).x;
-	float y_p1 = texture(tex, vec2(tc.x, tc.y +       inv_image_size.y)).x;
-	float y_p2 = texture(tex, vec2(tc.x, tc.y + 2.0 * inv_image_size.y)).x;
+	float y_m2 = textureOffset(tex, tc, ivec2( 0, -2)).x;
+	float y_m1 = textureOffset(tex, tc, ivec2( 0, -1)).x;
+	float y_p1 = textureOffset(tex, tc, ivec2( 0,  1)).x;
+	float y_p2 = textureOffset(tex, tc, ivec2( 0,  2)).x;
 
 	derivatives.x = (x_p1 - x_m1) * (2.0/3.0) + (x_m2 - x_p2) * (1.0/12.0);
 	derivatives.y = (y_p1 - y_m1) * (2.0/3.0) + (y_m2 - y_p2) * (1.0/12.0);
+
+	// The nudge term in the square root in the DeepFlow paper is Î¶Â² = 0.1Â² = 0.01.
+	// But this is assuming a 0..255 level. Given the nonlinearities in the expression
+	// where Î²_0 appears, there's no 100% equivalent way to adjust this
+	// constant that I can see, but taking it to (0.1/255)Â² ~= 1.53e-7 ~=
+	// 1e-7 ought to be good enough. I guess the basic idea is that it
+	// will only matter for near-zero derivatives anyway. I am a tiny
+	// bit worried about fp16 precision when storing these numbers, but OK.
+	beta_0 = 1.0 / (derivatives.x * derivatives.x + derivatives.y * derivatives.y + 1e-7);
 }