From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Fri, 20 Jul 2018 08:33:56 +0000 (+0200)
Subject: Implement SOR.
X-Git-Tag: 1.8.0~76^2~221
X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=1d38749aace96010e785cd49b7f93b89e4c927bb

Implement SOR.
---

diff --git a/equations.frag b/equations.frag
index 4509e34..3926351 100644
--- a/equations.frag
+++ b/equations.frag
@@ -1,7 +1,7 @@
 #version 450 core
 
 in vec2 tc;
-out vec4 equation;
+out uvec4 equation;
 
 uniform sampler2D I_x_y_tex, I_t_tex;
 uniform sampler2D diff_flow_tex, flow_tex;
@@ -97,9 +97,9 @@ void main()
 	// E_S term, sans the part on the right-hand side that deals with
 	// the neighboring pixels.
 	// TODO: Multiply by some gamma.
-	float smooth_l = textureOffset(smoothness_x_tex, tc, ivec2(-1, 0)).x;
+	float smooth_l = textureOffset(smoothness_x_tex, tc, ivec2(-1,  0)).x;
 	float smooth_r = texture(smoothness_x_tex, tc).x;
-	float smooth_d = textureOffset(smoothness_y_tex, tc, ivec2(-1, 0)).x;
+	float smooth_d = textureOffset(smoothness_y_tex, tc, ivec2( 0, -1)).x;
 	float smooth_u = texture(smoothness_y_tex, tc).x;
 	A11 -= smooth_l + smooth_r + smooth_d + smooth_u;
 	A22 -= smooth_l + smooth_r + smooth_d + smooth_u;
@@ -120,8 +120,8 @@ void main()
 	b2 += central.y;
 
 	// Encode the equation down into four uint32s.
-	equation.x = floatBitsToUint(A11);
+	equation.x = floatBitsToUint(1.0 / A11);
 	equation.y = floatBitsToUint(A12);
-	equation.z = floatBitsToUint(A22);
+	equation.z = floatBitsToUint(1.0 / A22);
 	equation.w = packHalf2x16(vec2(b1, b2));
 }
diff --git a/flow.cpp b/flow.cpp
index bd94cba..177a140 100644
--- a/flow.cpp
+++ b/flow.cpp
@@ -33,7 +33,7 @@ constexpr unsigned finest_level = 1;
 constexpr unsigned patch_size_pixels = 12;
 
 // Some global OpenGL objects.
-GLuint nearest_sampler, linear_sampler;
+GLuint nearest_sampler, linear_sampler, smoothness_sampler;
 GLuint vertex_vbo;
 
 string read_file(const string &filename)
@@ -597,10 +597,11 @@ void ComputeSmoothness::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint smoot
 // Set up the equations set (two equations in two unknowns, per pixel).
 // We store five floats; the three non-redundant elements of the 2x2 matrix (A)
 // as 32-bit floats, and the two elements on the right-hand side (b) as 16-bit
-// floats. This fits into four u32 values; R, G, B for the matrix (the last
-// element is symmetric) and A for the two b values. All the values of the
-// energy term (E_I, E_G, E_S), except the smoothness terms that depend on
-// other pixels, are calculated in one pass.
+// floats. (Actually, we store the inverse of the diagonal elements, because
+// we only ever need to divide by them.) This fits into four u32 values;
+// R, G, B for the matrix (the last element is symmetric) and A for the two b values.
+// All the values of the energy term (E_I, E_G, E_S), except the smoothness
+// terms that depend on other pixels, are calculated in one pass.
 //
 // See variational_refinement.txt for more information.
 class SetupEquations {
@@ -609,8 +610,6 @@ public:
 	void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, GLuint equation_tex, int level_width, int level_height);
 
 private:
-	GLuint smoothness_sampler;
-
 	GLuint equations_vs_obj;
 	GLuint equations_fs_obj;
 	GLuint equations_program;
@@ -625,16 +624,6 @@ private:
 
 SetupEquations::SetupEquations()
 {
-	// The smoothness is sampled so that once we get to a smoothness involving
-	// a value outside the border, the diffusivity between the two becomes zero.
-	glCreateSamplers(1, &smoothness_sampler);
-	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
-	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
-	float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };
-	glSamplerParameterfv(smoothness_sampler, GL_TEXTURE_BORDER_COLOR, zero);
-
 	equations_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
 	equations_fs_obj = compile_shader(read_file("equations.frag"), GL_FRAGMENT_SHADER);
 	equations_program = link_program(equations_vs_obj, equations_fs_obj);
@@ -680,6 +669,74 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex
 	glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 }
 
+// Calculate the smoothness constraints between neighboring pixels;
+// s_x(x,y) stores smoothness between pixel (x,y) and (x+1,y),
+// and s_y(x,y) stores between (x,y) and (x,y+1). We'll sample with
+// border color (0,0) later, so that there's zero diffusion out of
+// the border.
+//
+// See variational_refinement.txt for more information.
+class SOR {
+public:
+	SOR();
+	void exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations);
+
+private:
+	GLuint sor_vs_obj;
+	GLuint sor_fs_obj;
+	GLuint sor_program;
+	GLuint sor_vao;
+
+	GLuint uniform_diff_flow_tex;
+	GLuint uniform_equation_tex;
+	GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex;
+};
+
+SOR::SOR()
+{
+	sor_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+	sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER);
+	sor_program = link_program(sor_vs_obj, sor_fs_obj);
+
+	// Set up the VAO containing all the required position/texcoord data.
+	glCreateVertexArrays(1, &sor_vao);
+	glBindVertexArray(sor_vao);
+	glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+	GLint position_attrib = glGetAttribLocation(sor_program, "position");
+	glEnableVertexArrayAttrib(sor_vao, position_attrib);
+	glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+	uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex");
+	uniform_equation_tex = glGetUniformLocation(sor_program, "equation_tex");
+}
+
+void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations)
+{
+	glUseProgram(sor_program);
+
+	bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+	bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, smoothness_sampler);
+	bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, smoothness_sampler);
+	bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
+
+	GLuint sor_fbo;  // TODO: cleanup
+	glCreateFramebuffers(1, &sor_fbo);
+	glNamedFramebufferTexture(sor_fbo, GL_COLOR_ATTACHMENT0, diff_flow_tex, 0);  // NOTE: Bind to same as we render from!
+
+	glViewport(0, 0, level_width, level_height);
+	glDisable(GL_BLEND);
+	glBindVertexArray(sor_vao);
+	glBindFramebuffer(GL_FRAMEBUFFER, sor_fbo);
+
+	for (int i = 0; i < num_iterations; ++i) {
+		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+		if (i != num_iterations - 1) {
+			glTextureBarrier();
+		}
+	}
+}
+
 int main(void)
 {
 	if (SDL_Init(SDL_INIT_EVERYTHING) == -1) {
@@ -720,6 +777,16 @@ int main(void)
 	glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 	glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 
+	// The smoothness is sampled so that once we get to a smoothness involving
+	// a value outside the border, the diffusivity between the two becomes zero.
+	glCreateSamplers(1, &smoothness_sampler);
+	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+	glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+	float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+	glSamplerParameterfv(smoothness_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+
 	float vertices[] = {
 		0.0f, 1.0f,
 		0.0f, 0.0f,
@@ -744,6 +811,7 @@ int main(void)
 	Derivatives derivatives;
 	ComputeSmoothness compute_smoothness;
 	SetupEquations setup_equations;
+	SOR sor;
 
 	GLuint query;
 	glGenQueries(1, &query);
@@ -845,6 +913,10 @@ int main(void)
 
 			// Set up the 2x2 equation system for each pixel.
 			setup_equations.exec(I_x_y_tex, I_t_tex, du_dv_tex, dense_flow_tex, beta_0_tex, smoothness_x_tex, smoothness_y_tex, equation_tex, level_width, level_height);
+
+			// Run a few SOR (or quasi-SOR, since we're not really Jacobi) iterations.
+			// Note that these are to/from the same texture.
+			sor.exec(du_dv_tex, equation_tex, smoothness_x_tex, smoothness_y_tex, level_width, level_height, 5);
 		}
 
 		prev_level_flow_tex = dense_flow_tex;
diff --git a/sor.frag b/sor.frag
new file mode 100644
index 0000000..f6ac2ee
--- /dev/null
+++ b/sor.frag
@@ -0,0 +1,39 @@
+#version 450 core
+
+in vec2 tc;
+out vec2 diff_flow;
+
+uniform sampler2D diff_flow_tex, smoothness_x_tex, smoothness_y_tex;
+uniform usampler2D equation_tex;
+
+void main()
+{
+	uvec4 equation = texture(equation_tex, tc);
+	float inv_A11 = uintBitsToFloat(equation.x);
+	float A12 = uintBitsToFloat(equation.y);
+	float inv_A22 = uintBitsToFloat(equation.z);
+	vec2 b = unpackHalf2x16(equation.w);
+
+	// Subtract the missing terms from the right-hand side
+	// (it couldn't be done earlier, because we didn't know
+	// the values of the neighboring pixels; they change for
+	// each SOR iteration).
+	// TODO: Multiply by some gamma.
+	float smooth_l = textureOffset(smoothness_x_tex, tc, ivec2(-1,  0)).x;
+	float smooth_r = texture(smoothness_x_tex, tc).x;
+	float smooth_d = textureOffset(smoothness_y_tex, tc, ivec2( 0, -1)).x;
+	float smooth_u = texture(smoothness_y_tex, tc).x;
+	b -= smooth_l * textureOffset(diff_flow_tex, tc, ivec2(-1,  0)).xy;
+	b -= smooth_r * textureOffset(diff_flow_tex, tc, ivec2( 1,  0)).xy;
+	b -= smooth_d * textureOffset(diff_flow_tex, tc, ivec2( 0, -1)).xy;
+	b -= smooth_u * textureOffset(diff_flow_tex, tc, ivec2( 0,  1)).xy;
+
+	const float omega = 1.6;
+	diff_flow = texture(diff_flow_tex, tc).xy;
+
+	// From https://en.wikipedia.org/wiki/Successive_over-relaxation.
+	float sigma_u = A12 * diff_flow.y;
+	diff_flow.x += omega * ((b.x - sigma_u) * inv_A11 - diff_flow.x);
+	float sigma_v = A12 * diff_flow.x;
+	diff_flow.y += omega * ((b.y - sigma_v) * inv_A22 - diff_flow.y);
+}