From 6d4c94c8a49e5637580c0b5ba30f97e34e5d2ff5 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 4 Aug 2018 22:35:43 +0200 Subject: [PATCH] Fix a bug where the first black pass of SOR would read junk data. --- flow.cpp | 17 ++++++++++++----- sor.frag | 11 ++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/flow.cpp b/flow.cpp index 7658942..2ed5bb8 100644 --- a/flow.cpp +++ b/flow.cpp @@ -764,7 +764,7 @@ private: GLuint uniform_diff_flow_tex; GLuint uniform_equation_red_tex, uniform_equation_black_tex; GLuint uniform_diffusivity_tex; - GLuint uniform_phase, uniform_zero_diff_flow; + GLuint uniform_phase, uniform_num_nonzero_phases; }; SOR::SOR() @@ -778,7 +778,7 @@ SOR::SOR() uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex"); uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex"); uniform_phase = glGetUniformLocation(sor_program, "phase"); - uniform_zero_diff_flow = glGetUniformLocation(sor_program, "zero_diff_flow"); + uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases"); } void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, ScopedTimer *sor_timer) @@ -790,7 +790,9 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler); bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler); - glProgramUniform1i(sor_program, uniform_zero_diff_flow, zero_diff_flow); + if (!zero_diff_flow) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2); + } // NOTE: We bind to the texture we are rendering from, but we never write any value // that we read in the same shader pass (we call discard for red values when we compute @@ -803,6 +805,9 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl for (int i = 0; i < num_iterations; ++i) { { ScopedTimer timer("Red pass", sor_timer); + if (zero_diff_flow && i == 0) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0); + } glProgramUniform1i(sor_program, uniform_phase, 0); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glTextureBarrier(); @@ -810,11 +815,13 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl { ScopedTimer timer("Black pass", sor_timer); if (zero_diff_flow && i == 0) { - // Not zero anymore. - glProgramUniform1i(sor_program, uniform_zero_diff_flow, 0); + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1); } glProgramUniform1i(sor_program, uniform_phase, 1); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + if (zero_diff_flow && i == 0) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2); + } if (i != num_iterations - 1) { glTextureBarrier(); } diff --git a/sor.frag b/sor.frag index ef431d3..b072839 100644 --- a/sor.frag +++ b/sor.frag @@ -9,7 +9,7 @@ uniform sampler2D diff_flow_tex, diffusivity_tex; uniform usampler2D equation_red_tex, equation_black_tex; uniform int phase; -uniform bool zero_diff_flow; +uniform int num_nonzero_phases; // See pack_floats_shared() in equations.frag. vec2 unpack_floats_shared(uint c) @@ -70,7 +70,7 @@ void main() const float omega = 1.8; // Marginally better than 1.6, it seems. - if (zero_diff_flow) { + if (num_nonzero_phases == 0) { // Simplified version of the code below, assuming diff_flow == 0.0f everywhere. diff_flow.x = omega * b.x * inv_A11; diff_flow.y = omega * b.y * inv_A22; @@ -87,7 +87,12 @@ void main() b += smooth_r * textureOffset(diff_flow_tex, tc, ivec2( 1, 0)).xy; b += smooth_d * textureOffset(diff_flow_tex, tc, ivec2( 0, -1)).xy; b += smooth_u * textureOffset(diff_flow_tex, tc, ivec2( 0, 1)).xy; - diff_flow = texture(diff_flow_tex, tc).xy; + + if (num_nonzero_phases == 1) { + diff_flow = vec2(0.0f); + } else { + diff_flow = texture(diff_flow_tex, tc).xy; + } // From https://en.wikipedia.org/wiki/Successive_over-relaxation. float sigma_u = A12 * diff_flow.y; -- 2.39.2