X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=flow.cpp;h=4fa779473a400a3baa5e9c6c1cfd0a26f9c174cc;hb=d88cb2ee9ddf6dff5428fd593afddf1384b185e8;hp=b5656cb10766cdf4ee6cb6ce6b06c20a195fb799;hpb=c20ac3210a3eb12531939afb32a232ba9c68a0b5;p=nageru diff --git a/flow.cpp b/flow.cpp index b5656cb..4fa7794 100644 --- a/flow.cpp +++ b/flow.cpp @@ -48,6 +48,8 @@ float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f; bool enable_timing = true; bool detailed_timing = false; +bool enable_warmup = false; +bool in_warmup = false; bool enable_variational_refinement = true; // Just for debugging. bool enable_interpolation = false; @@ -764,7 +766,7 @@ private: GLuint uniform_diff_flow_tex; GLuint uniform_equation_red_tex, uniform_equation_black_tex; GLuint uniform_diffusivity_tex; - GLuint uniform_phase, uniform_zero_diff_flow; + GLuint uniform_phase, uniform_num_nonzero_phases; }; SOR::SOR() @@ -778,7 +780,7 @@ SOR::SOR() uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex"); uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex"); uniform_phase = glGetUniformLocation(sor_program, "phase"); - uniform_zero_diff_flow = glGetUniformLocation(sor_program, "zero_diff_flow"); + uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases"); } void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, ScopedTimer *sor_timer) @@ -790,7 +792,9 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler); bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler); - glProgramUniform1i(sor_program, uniform_zero_diff_flow, zero_diff_flow); + if (!zero_diff_flow) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2); + } // NOTE: We bind to the texture we are rendering from, but we never write any value // that we read in the same shader pass (we call discard for red values when we compute @@ -803,6 +807,9 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl for (int i = 0; i < num_iterations; ++i) { { ScopedTimer timer("Red pass", sor_timer); + if (zero_diff_flow && i == 0) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0); + } glProgramUniform1i(sor_program, uniform_phase, 0); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glTextureBarrier(); @@ -810,11 +817,13 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl { ScopedTimer timer("Black pass", sor_timer); if (zero_diff_flow && i == 0) { - // Not zero anymore. - glProgramUniform1i(sor_program, uniform_zero_diff_flow, 0); + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1); } glProgramUniform1i(sor_program, uniform_phase, 1); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + if (zero_diff_flow && i == 0) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2); + } if (i != num_iterations - 1) { glTextureBarrier(); } @@ -1114,7 +1123,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra // We need somewhere to store du and dv (the flow increment, relative // to the non-refined base flow u0 and v0). It's initially garbage, // but not read until we've written something sane to it. - GLuint du_dv_tex = pool.get_texture(GL_RG16F, level_width, level_height); + GLuint diff_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height); // And for diffusivity. GLuint diffusivity_tex = pool.get_texture(GL_R16F, level_width, level_height); @@ -1128,19 +1137,19 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra // Calculate the diffusivity term for each pixel. { ScopedTimer timer("Compute diffusivity", &varref_timer); - compute_diffusivity.exec(base_flow_tex, du_dv_tex, diffusivity_tex, level_width, level_height, outer_idx == 0); + compute_diffusivity.exec(base_flow_tex, diff_flow_tex, diffusivity_tex, level_width, level_height, outer_idx == 0); } // Set up the 2x2 equation system for each pixel. { ScopedTimer timer("Set up equations", &varref_timer); - setup_equations.exec(I_x_y_tex, I_t_tex, du_dv_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0); + setup_equations.exec(I_x_y_tex, I_t_tex, diff_flow_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0); } // Run a few SOR iterations. Note that these are to/from the same texture. { ScopedTimer timer("SOR", &varref_timer); - sor.exec(du_dv_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, &timer); + sor.exec(diff_flow_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, &timer); } } @@ -1159,9 +1168,9 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra // it is more efficient), but it helps debug the motion search. if (enable_variational_refinement) { ScopedTimer timer("Add differential flow", &varref_timer); - add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height); + add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height); } - pool.release_texture(du_dv_tex); + pool.release_texture(diff_flow_tex); if (prev_level_flow_tex != initial_flow_tex) { pool.release_texture(prev_level_flow_tex); @@ -1172,7 +1181,9 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra } total_timer.end(); - timers.print(); + if (!in_warmup) { + timers.print(); + } // Scale up the flow to the final size (if needed). if (finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) { @@ -1555,7 +1566,9 @@ GLuint Interpolate::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLui } pool.release_texture(flow_tex); total_timer.end(); - timers.print(); + if (!in_warmup) { + timers.print(); + } return output_tex; } @@ -1760,6 +1773,16 @@ void compute_flow_only(int argc, char **argv, int optind) glGenerateTextureMipmap(tex1_gray); DISComputeFlow compute_flow(width1, height1); + + if (enable_warmup) { + in_warmup = true; + for (int i = 0; i < 10; ++i) { + GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE); + compute_flow.release_texture(final_tex); + } + in_warmup = false; + } + GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE); schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm"); @@ -1854,6 +1877,19 @@ void interpolate_image(int argc, char **argv, int optind) gray.exec(tex1, tex1_gray, width1, height1); glGenerateTextureMipmap(tex1_gray); + if (enable_warmup) { + in_warmup = true; + for (int i = 0; i < 10; ++i) { + GLuint forward_flow_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); + GLuint backward_flow_tex = compute_flow.exec(tex1_gray, tex0_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); + GLuint interpolated_tex = interpolate.exec(tex0, tex1, forward_flow_tex, backward_flow_tex, width1, height1, 0.5f); + compute_flow.release_texture(forward_flow_tex); + compute_flow.release_texture(backward_flow_tex); + interpolate.release_texture(interpolated_tex); + } + in_warmup = false; + } + GLuint forward_flow_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); GLuint backward_flow_tex = compute_flow.exec(tex1_gray, tex0_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); @@ -1882,7 +1918,8 @@ int main(int argc, char **argv) { "disable-timing", no_argument, 0, 1000 }, { "detailed-timing", no_argument, 0, 1003 }, { "ignore-variational-refinement", no_argument, 0, 1001 }, // Still calculates it, just doesn't apply it. - { "interpolate", no_argument, 0, 1002 } + { "interpolate", no_argument, 0, 1002 }, + { "warmup", no_argument, 0, 1004 } }; for ( ;; ) { @@ -1914,6 +1951,9 @@ int main(int argc, char **argv) case 1003: detailed_timing = true; break; + case 1004: + enable_warmup = true; + break; default: fprintf(stderr, "Unknown option '%s'\n", argv[option_index]); exit(1);