X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=flow.cpp;h=4fa779473a400a3baa5e9c6c1cfd0a26f9c174cc;hb=d88cb2ee9ddf6dff5428fd593afddf1384b185e8;hp=d914d688fa550cdcaf2ad0341992547018c81ffd;hpb=a5204594f8ed09c1bd91c24481dc96e5a3000520;p=nageru diff --git a/flow.cpp b/flow.cpp index d914d68..4fa7794 100644 --- a/flow.cpp +++ b/flow.cpp @@ -48,6 +48,8 @@ float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f; bool enable_timing = true; bool detailed_timing = false; +bool enable_warmup = false; +bool in_warmup = false; bool enable_variational_refinement = true; // Just for debugging. bool enable_interpolation = false; @@ -222,32 +224,6 @@ GLuint link_program(GLuint vs_obj, GLuint fs_obj) return program; } -GLuint generate_vbo(GLint size, GLsizeiptr data_size, const GLvoid *data) -{ - GLuint vbo; - glCreateBuffers(1, &vbo); - glBufferData(GL_ARRAY_BUFFER, data_size, data, GL_STATIC_DRAW); - glNamedBufferData(vbo, data_size, data, GL_STATIC_DRAW); - return vbo; -} - -GLuint fill_vertex_attribute(GLuint vao, GLuint glsl_program_num, const string &attribute_name, GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data) -{ - int attrib = glGetAttribLocation(glsl_program_num, attribute_name.c_str()); - if (attrib == -1) { - return -1; - } - - GLuint vbo = generate_vbo(size, data_size, data); - - glBindBuffer(GL_ARRAY_BUFFER, vbo); - glEnableVertexArrayAttrib(vao, attrib); - glVertexAttribPointer(attrib, size, type, GL_FALSE, 0, BUFFER_OFFSET(0)); - glBindBuffer(GL_ARRAY_BUFFER, 0); - - return vbo; -} - void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler) { if (location == -1) { @@ -405,7 +381,6 @@ void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height) glViewport(0, 0, width, height); fbos.render_to(gray_tex); glBindVertexArray(gray_vao); - glUseProgram(gray_program); glDisable(GL_BLEND); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -428,7 +403,6 @@ private: GLuint sobel_vs_obj; GLuint sobel_fs_obj; GLuint sobel_program; - GLuint sobel_vao; GLuint uniform_tex; }; @@ -439,14 +413,6 @@ Sobel::Sobel() sobel_fs_obj = compile_shader(read_file("sobel.frag"), GL_FRAGMENT_SHADER); sobel_program = link_program(sobel_vs_obj, sobel_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &sobel_vao); - glBindVertexArray(sobel_vao); - - GLint position_attrib = glGetAttribLocation(sobel_program, "position"); - glEnableVertexArrayAttrib(sobel_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_tex = glGetUniformLocation(sobel_program, "tex"); } @@ -457,8 +423,6 @@ void Sobel::exec(GLint tex0_view, GLint grad0_tex, int level_width, int level_he glViewport(0, 0, level_width, level_height); fbos.render_to(grad0_tex); - glBindVertexArray(sobel_vao); - glUseProgram(sobel_program); glDisable(GL_BLEND); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -475,10 +439,9 @@ private: GLuint motion_vs_obj; GLuint motion_fs_obj; GLuint motion_search_program; - GLuint motion_search_vao; GLuint uniform_inv_image_size, uniform_inv_prev_level_size; - GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex; + GLuint uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex; }; MotionSearch::MotionSearch() @@ -487,18 +450,8 @@ MotionSearch::MotionSearch() motion_fs_obj = compile_shader(read_file("motion_search.frag"), GL_FRAGMENT_SHADER); motion_search_program = link_program(motion_vs_obj, motion_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &motion_search_vao); - glBindVertexArray(motion_search_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(motion_search_program, "position"); - glEnableVertexArrayAttrib(motion_search_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size"); uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size"); - uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex"); uniform_image1_tex = glGetUniformLocation(motion_search_program, "image1_tex"); uniform_grad0_tex = glGetUniformLocation(motion_search_program, "grad0_tex"); uniform_flow_tex = glGetUniformLocation(motion_search_program, "flow_tex"); @@ -508,9 +461,8 @@ void MotionSearch::exec(GLuint tex0_view, GLuint tex1_view, GLuint grad0_tex, GL { glUseProgram(motion_search_program); - bind_sampler(motion_search_program, uniform_image0_tex, 0, tex0_view, nearest_sampler); bind_sampler(motion_search_program, uniform_image1_tex, 1, tex1_view, linear_sampler); - bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler); + bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, nearest_sampler); bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler); glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height); @@ -518,8 +470,6 @@ void MotionSearch::exec(GLuint tex0_view, GLuint tex1_view, GLuint grad0_tex, GL glViewport(0, 0, width_patches, height_patches); fbos.render_to(flow_out_tex); - glBindVertexArray(motion_search_vao); - glUseProgram(motion_search_program); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -542,7 +492,6 @@ private: GLuint densify_vs_obj; GLuint densify_fs_obj; GLuint densify_program; - GLuint densify_vao; GLuint uniform_patch_size; GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex; @@ -554,15 +503,6 @@ Densify::Densify() densify_fs_obj = compile_shader(read_file("densify.frag"), GL_FRAGMENT_SHADER); densify_program = link_program(densify_vs_obj, densify_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &densify_vao); - glBindVertexArray(densify_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(densify_program, "position"); - glEnableVertexArrayAttrib(densify_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_patch_size = glGetUniformLocation(densify_program, "patch_size"); uniform_image0_tex = glGetUniformLocation(densify_program, "image0_tex"); uniform_image1_tex = glGetUniformLocation(densify_program, "image1_tex"); @@ -584,7 +524,6 @@ void Densify::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint d glViewport(0, 0, level_width, level_height); glEnable(GL_BLEND); glBlendFunc(GL_ONE, GL_ONE); - glBindVertexArray(densify_vao); fbos.render_to(dense_flow_tex); glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glClear(GL_COLOR_BUFFER_BIT); @@ -610,7 +549,6 @@ private: GLuint prewarp_vs_obj; GLuint prewarp_fs_obj; GLuint prewarp_program; - GLuint prewarp_vao; GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex; }; @@ -621,15 +559,6 @@ Prewarp::Prewarp() prewarp_fs_obj = compile_shader(read_file("prewarp.frag"), GL_FRAGMENT_SHADER); prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &prewarp_vao); - glBindVertexArray(prewarp_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(prewarp_program, "position"); - glEnableVertexArrayAttrib(prewarp_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_image0_tex = glGetUniformLocation(prewarp_program, "image0_tex"); uniform_image1_tex = glGetUniformLocation(prewarp_program, "image1_tex"); uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex"); @@ -645,7 +574,6 @@ void Prewarp::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint I glViewport(0, 0, level_width, level_height); glDisable(GL_BLEND); - glBindVertexArray(prewarp_vao); fbos.render_to(I_tex, I_t_tex, normalized_flow_tex); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -669,7 +597,6 @@ private: GLuint derivatives_vs_obj; GLuint derivatives_fs_obj; GLuint derivatives_program; - GLuint derivatives_vao; GLuint uniform_tex; }; @@ -680,15 +607,6 @@ Derivatives::Derivatives() derivatives_fs_obj = compile_shader(read_file("derivatives.frag"), GL_FRAGMENT_SHADER); derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &derivatives_vao); - glBindVertexArray(derivatives_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(derivatives_program, "position"); - glEnableVertexArrayAttrib(derivatives_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_tex = glGetUniformLocation(derivatives_program, "tex"); } @@ -700,78 +618,58 @@ void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, in glViewport(0, 0, level_width, level_height); glDisable(GL_BLEND); - glBindVertexArray(derivatives_vao); fbos.render_to(I_x_y_tex, beta_0_tex); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } -// Calculate the smoothness constraints between neighboring pixels; -// s_x(x,y) stores smoothness between pixel (x,y) and (x+1,y), -// and s_y(x,y) stores between (x,y) and (x,y+1). We'll sample with -// border color (0,0) later, so that there's zero diffusion out of -// the border. +// Calculate the diffusivity for each pixels, g(x,y). Smoothness (s) will +// be calculated in the shaders on-the-fly by sampling in-between two +// neighboring g(x,y) pixels, plus a border tweak to make sure we get +// zero smoothness at the border. // // See variational_refinement.txt for more information. -class ComputeSmoothness { +class ComputeDiffusivity { public: - ComputeSmoothness(); - void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, bool zero_diff_flow); + ComputeDiffusivity(); + void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow); private: - PersistentFBOSet<2> fbos; + PersistentFBOSet<1> fbos; - GLuint smoothness_vs_obj; - GLuint smoothness_fs_obj; - GLuint smoothness_program; - GLuint smoothness_vao; + GLuint diffusivity_vs_obj; + GLuint diffusivity_fs_obj; + GLuint diffusivity_program; GLuint uniform_flow_tex, uniform_diff_flow_tex; GLuint uniform_alpha, uniform_zero_diff_flow; }; -ComputeSmoothness::ComputeSmoothness() +ComputeDiffusivity::ComputeDiffusivity() { - smoothness_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - smoothness_fs_obj = compile_shader(read_file("smoothness.frag"), GL_FRAGMENT_SHADER); - smoothness_program = link_program(smoothness_vs_obj, smoothness_fs_obj); - - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &smoothness_vao); - glBindVertexArray(smoothness_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(smoothness_program, "position"); - glEnableVertexArrayAttrib(smoothness_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - - uniform_flow_tex = glGetUniformLocation(smoothness_program, "flow_tex"); - uniform_diff_flow_tex = glGetUniformLocation(smoothness_program, "diff_flow_tex"); - uniform_alpha = glGetUniformLocation(smoothness_program, "alpha"); - uniform_zero_diff_flow = glGetUniformLocation(smoothness_program, "zero_diff_flow"); + diffusivity_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); + diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag"), GL_FRAGMENT_SHADER); + diffusivity_program = link_program(diffusivity_vs_obj, diffusivity_fs_obj); + + uniform_flow_tex = glGetUniformLocation(diffusivity_program, "flow_tex"); + uniform_diff_flow_tex = glGetUniformLocation(diffusivity_program, "diff_flow_tex"); + uniform_alpha = glGetUniformLocation(diffusivity_program, "alpha"); + uniform_zero_diff_flow = glGetUniformLocation(diffusivity_program, "zero_diff_flow"); } -void ComputeSmoothness::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, bool zero_diff_flow) +void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow) { - glUseProgram(smoothness_program); + glUseProgram(diffusivity_program); - bind_sampler(smoothness_program, uniform_flow_tex, 0, flow_tex, nearest_sampler); - bind_sampler(smoothness_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler); - glProgramUniform1f(smoothness_program, uniform_alpha, vr_alpha); - glProgramUniform1i(smoothness_program, uniform_zero_diff_flow, zero_diff_flow); + bind_sampler(diffusivity_program, uniform_flow_tex, 0, flow_tex, nearest_sampler); + bind_sampler(diffusivity_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler); + glProgramUniform1f(diffusivity_program, uniform_alpha, vr_alpha); + glProgramUniform1i(diffusivity_program, uniform_zero_diff_flow, zero_diff_flow); glViewport(0, 0, level_width, level_height); glDisable(GL_BLEND); - glBindVertexArray(smoothness_vao); - fbos.render_to(smoothness_x_tex, smoothness_y_tex); + fbos.render_to(diffusivity_tex); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - // Make sure the smoothness on the right and upper borders is zero. - // We could have done this by making (W-1)xH and Wx(H-1) textures instead - // (we're sampling smoothness with all-zero border color), but we'd - // have to adjust the sampling coordinates, which is annoying. - glClearTexSubImage(smoothness_x_tex, 0, level_width - 1, 0, 0, 1, level_height, 1, GL_RED, GL_FLOAT, nullptr); - glClearTexSubImage(smoothness_y_tex, 0, 0, level_height - 1, 0, level_width, 1, 1, GL_RED, GL_FLOAT, nullptr); } // Set up the equations set (two equations in two unknowns, per pixel). @@ -783,55 +681,53 @@ void ComputeSmoothness::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint smoot // All the values of the energy term (E_I, E_G, E_S), except the smoothness // terms that depend on other pixels, are calculated in one pass. // -// See variational_refinement.txt for more information. +// The equation set is split in two; one contains only the pixels needed for +// the red pass, and one only for the black pass (see sor.frag). This reduces +// the amount of data the SOR shader has to pull in, at the cost of some +// complexity when the equation texture ends up with half the size and we need +// to adjust texture coordinates. The contraction is done along the horizontal +// axis, so that on even rows (0, 2, 4, ...), the “red” texture will contain +// pixels 0, 2, 4, 6, etc., and on odd rows 1, 3, 5, etc.. +// +// See variational_refinement.txt for more information about the actual +// equations in use. class SetupEquations { public: SetupEquations(); - void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, GLuint equation_tex, int level_width, int level_height, bool zero_diff_flow); + void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow); private: - PersistentFBOSet<1> fbos; + PersistentFBOSet<2> fbos; GLuint equations_vs_obj; GLuint equations_fs_obj; GLuint equations_program; - GLuint equations_vao; GLuint uniform_I_x_y_tex, uniform_I_t_tex; GLuint uniform_diff_flow_tex, uniform_base_flow_tex; GLuint uniform_beta_0_tex; - GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex; + GLuint uniform_diffusivity_tex; GLuint uniform_gamma, uniform_delta, uniform_zero_diff_flow; }; SetupEquations::SetupEquations() { - equations_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); + equations_vs_obj = compile_shader(read_file("equations.vert"), GL_VERTEX_SHADER); equations_fs_obj = compile_shader(read_file("equations.frag"), GL_FRAGMENT_SHADER); equations_program = link_program(equations_vs_obj, equations_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &equations_vao); - glBindVertexArray(equations_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(equations_program, "position"); - glEnableVertexArrayAttrib(equations_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex"); uniform_I_t_tex = glGetUniformLocation(equations_program, "I_t_tex"); uniform_diff_flow_tex = glGetUniformLocation(equations_program, "diff_flow_tex"); uniform_base_flow_tex = glGetUniformLocation(equations_program, "base_flow_tex"); uniform_beta_0_tex = glGetUniformLocation(equations_program, "beta_0_tex"); - uniform_smoothness_x_tex = glGetUniformLocation(equations_program, "smoothness_x_tex"); - uniform_smoothness_y_tex = glGetUniformLocation(equations_program, "smoothness_y_tex"); + uniform_diffusivity_tex = glGetUniformLocation(equations_program, "diffusivity_tex"); uniform_gamma = glGetUniformLocation(equations_program, "gamma"); uniform_delta = glGetUniformLocation(equations_program, "delta"); uniform_zero_diff_flow = glGetUniformLocation(equations_program, "zero_diff_flow"); } -void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, GLuint equation_tex, int level_width, int level_height, bool zero_diff_flow) +void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow) { glUseProgram(equations_program); @@ -840,16 +736,14 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler); bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler); bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler); - bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, zero_border_sampler); - bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, zero_border_sampler); + bind_sampler(equations_program, uniform_diffusivity_tex, 5, diffusivity_tex, zero_border_sampler); glProgramUniform1f(equations_program, uniform_delta, vr_delta); glProgramUniform1f(equations_program, uniform_gamma, vr_gamma); glProgramUniform1i(equations_program, uniform_zero_diff_flow, zero_diff_flow); - glViewport(0, 0, level_width, level_height); + glViewport(0, 0, (level_width + 1) / 2, level_height); glDisable(GL_BLEND); - glBindVertexArray(equations_vao); - fbos.render_to(equation_tex); + fbos.render_to({equation_red_tex, equation_black_tex}); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -860,7 +754,7 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex class SOR { public: SOR(); - void exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, ScopedTimer *sor_timer); + void exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, ScopedTimer *sor_timer); private: PersistentFBOSet<1> fbos; @@ -868,12 +762,11 @@ private: GLuint sor_vs_obj; GLuint sor_fs_obj; GLuint sor_program; - GLuint sor_vao; GLuint uniform_diff_flow_tex; - GLuint uniform_equation_tex; - GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex; - GLuint uniform_phase, uniform_zero_diff_flow; + GLuint uniform_equation_red_tex, uniform_equation_black_tex; + GLuint uniform_diffusivity_tex; + GLuint uniform_phase, uniform_num_nonzero_phases; }; SOR::SOR() @@ -882,33 +775,26 @@ SOR::SOR() sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER); sor_program = link_program(sor_vs_obj, sor_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &sor_vao); - glBindVertexArray(sor_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(sor_program, "position"); - glEnableVertexArrayAttrib(sor_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex"); - uniform_equation_tex = glGetUniformLocation(sor_program, "equation_tex"); - uniform_smoothness_x_tex = glGetUniformLocation(sor_program, "smoothness_x_tex"); - uniform_smoothness_y_tex = glGetUniformLocation(sor_program, "smoothness_y_tex"); + uniform_equation_red_tex = glGetUniformLocation(sor_program, "equation_red_tex"); + uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex"); + uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex"); uniform_phase = glGetUniformLocation(sor_program, "phase"); - uniform_zero_diff_flow = glGetUniformLocation(sor_program, "zero_diff_flow"); + uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases"); } -void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, ScopedTimer *sor_timer) +void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, ScopedTimer *sor_timer) { glUseProgram(sor_program); bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler); - bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, zero_border_sampler); - bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler); - bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler); + bind_sampler(sor_program, uniform_diffusivity_tex, 1, diffusivity_tex, zero_border_sampler); + bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler); + bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler); - glProgramUniform1i(sor_program, uniform_zero_diff_flow, zero_diff_flow); + if (!zero_diff_flow) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2); + } // NOTE: We bind to the texture we are rendering from, but we never write any value // that we read in the same shader pass (we call discard for red values when we compute @@ -916,12 +802,14 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_te // as per the spec. glViewport(0, 0, level_width, level_height); glDisable(GL_BLEND); - glBindVertexArray(sor_vao); fbos.render_to(diff_flow_tex); for (int i = 0; i < num_iterations; ++i) { { ScopedTimer timer("Red pass", sor_timer); + if (zero_diff_flow && i == 0) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0); + } glProgramUniform1i(sor_program, uniform_phase, 0); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glTextureBarrier(); @@ -929,11 +817,13 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_te { ScopedTimer timer("Black pass", sor_timer); if (zero_diff_flow && i == 0) { - // Not zero anymore. - glProgramUniform1i(sor_program, uniform_zero_diff_flow, 0); + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1); } glProgramUniform1i(sor_program, uniform_phase, 1); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + if (zero_diff_flow && i == 0) { + glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2); + } if (i != num_iterations - 1) { glTextureBarrier(); } @@ -954,7 +844,6 @@ private: GLuint add_flow_vs_obj; GLuint add_flow_fs_obj; GLuint add_flow_program; - GLuint add_flow_vao; GLuint uniform_diff_flow_tex; }; @@ -965,15 +854,6 @@ AddBaseFlow::AddBaseFlow() add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag"), GL_FRAGMENT_SHADER); add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &add_flow_vao); - glBindVertexArray(add_flow_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(add_flow_program, "position"); - glEnableVertexArrayAttrib(add_flow_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex"); } @@ -986,7 +866,6 @@ void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_wid glViewport(0, 0, level_width, level_height); glEnable(GL_BLEND); glBlendFunc(GL_ONE, GL_ONE); - glBindVertexArray(add_flow_vao); fbos.render_to(base_flow_tex); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -1004,7 +883,6 @@ private: GLuint resize_flow_vs_obj; GLuint resize_flow_fs_obj; GLuint resize_flow_program; - GLuint resize_flow_vao; GLuint uniform_flow_tex; GLuint uniform_scale_factor; @@ -1016,15 +894,6 @@ ResizeFlow::ResizeFlow() resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag"), GL_FRAGMENT_SHADER); resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &resize_flow_vao); - glBindVertexArray(resize_flow_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(resize_flow_program, "position"); - glEnableVertexArrayAttrib(resize_flow_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex"); uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor"); } @@ -1039,7 +908,6 @@ void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int inpu glViewport(0, 0, output_width, output_height); glDisable(GL_BLEND); - glBindVertexArray(resize_flow_vao); fbos.render_to(out_tex); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -1080,6 +948,7 @@ public: private: int width, height; GLuint initial_flow_tex; + GLuint vertex_vbo, vao; TexturePool pool; // The various passes. @@ -1088,7 +957,7 @@ private: Densify densify; Prewarp prewarp; Derivatives derivatives; - ComputeSmoothness compute_smoothness; + ComputeDiffusivity compute_diffusivity; SetupEquations setup_equations; SOR sor; AddBaseFlow add_base_flow; @@ -1116,17 +985,35 @@ DISComputeFlow::DISComputeFlow(int width, int height) // Similarly, gradients are zero outside the border, since the edge is taken // to be constant. glCreateSamplers(1, &zero_border_sampler); - glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); - float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f }; // Note that zero alpha means we can also see whether we sampled outside the border or not. glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero); // Initial flow is zero, 1x1. glCreateTextures(GL_TEXTURE_2D, 1, &initial_flow_tex); glTextureStorage2D(initial_flow_tex, 1, GL_RG16F, 1, 1); glClearTexImage(initial_flow_tex, 0, GL_RG, GL_FLOAT, nullptr); + + // Set up the vertex data that will be shared between all passes. + float vertices[] = { + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 1.0f, + 1.0f, 0.0f, + }; + glCreateBuffers(1, &vertex_vbo); + glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW); + + glCreateVertexArrays(1, &vao); + glBindVertexArray(vao); + glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); + + GLint position_attrib = 0; // Hard-coded in every vertex shader. + glEnableVertexArrayAttrib(vao, position_attrib); + glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); } GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_strategy) @@ -1136,6 +1023,8 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra GPUTimers timers; + glBindVertexArray(vao); + ScopedTimer total_timer("Total", &timers); for (int level = coarsest_level; level >= int(finest_level); --level) { char timer_name[256]; @@ -1164,7 +1053,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra // Create a new texture; we could be fancy and render use a multi-level // texture, but meh. - GLuint grad0_tex = pool.get_texture(GL_RG16F, level_width, level_height); + GLuint grad0_tex = pool.get_texture(GL_R32UI, level_width, level_height); // Find the derivative. { @@ -1234,44 +1123,42 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra // We need somewhere to store du and dv (the flow increment, relative // to the non-refined base flow u0 and v0). It's initially garbage, // but not read until we've written something sane to it. - GLuint du_dv_tex = pool.get_texture(GL_RG16F, level_width, level_height); + GLuint diff_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height); - // And for smoothness. - GLuint smoothness_x_tex = pool.get_texture(GL_R16F, level_width, level_height); - GLuint smoothness_y_tex = pool.get_texture(GL_R16F, level_width, level_height); + // And for diffusivity. + GLuint diffusivity_tex = pool.get_texture(GL_R16F, level_width, level_height); // And finally for the equation set. See SetupEquations for // the storage format. - GLuint equation_tex = pool.get_texture(GL_RGBA32UI, level_width, level_height); + GLuint equation_red_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height); + GLuint equation_black_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height); for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) { - // Calculate the smoothness terms between the neighboring pixels, - // both in x and y direction. + // Calculate the diffusivity term for each pixel. { - ScopedTimer timer("Compute smoothness", &varref_timer); - compute_smoothness.exec(base_flow_tex, du_dv_tex, smoothness_x_tex, smoothness_y_tex, level_width, level_height, outer_idx == 0); + ScopedTimer timer("Compute diffusivity", &varref_timer); + compute_diffusivity.exec(base_flow_tex, diff_flow_tex, diffusivity_tex, level_width, level_height, outer_idx == 0); } // Set up the 2x2 equation system for each pixel. { ScopedTimer timer("Set up equations", &varref_timer); - setup_equations.exec(I_x_y_tex, I_t_tex, du_dv_tex, base_flow_tex, beta_0_tex, smoothness_x_tex, smoothness_y_tex, equation_tex, level_width, level_height, outer_idx == 0); + setup_equations.exec(I_x_y_tex, I_t_tex, diff_flow_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0); } - // Run a few SOR (or quasi-SOR, since we're not really Jacobi) iterations. - // Note that these are to/from the same texture. + // Run a few SOR iterations. Note that these are to/from the same texture. { ScopedTimer timer("SOR", &varref_timer); - sor.exec(du_dv_tex, equation_tex, smoothness_x_tex, smoothness_y_tex, level_width, level_height, 5, outer_idx == 0, &timer); + sor.exec(diff_flow_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, &timer); } } pool.release_texture(I_t_tex); pool.release_texture(I_x_y_tex); pool.release_texture(beta_0_tex); - pool.release_texture(smoothness_x_tex); - pool.release_texture(smoothness_y_tex); - pool.release_texture(equation_tex); + pool.release_texture(diffusivity_tex); + pool.release_texture(equation_red_tex); + pool.release_texture(equation_black_tex); // Add the differential flow found by the variational refinement to the base flow, // giving the final flow estimate for this level. @@ -1281,9 +1168,9 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra // it is more efficient), but it helps debug the motion search. if (enable_variational_refinement) { ScopedTimer timer("Add differential flow", &varref_timer); - add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height); + add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height); } - pool.release_texture(du_dv_tex); + pool.release_texture(diff_flow_tex); if (prev_level_flow_tex != initial_flow_tex) { pool.release_texture(prev_level_flow_tex); @@ -1294,7 +1181,9 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1, ResizeStrategy resize_stra } total_timer.end(); - timers.print(); + if (!in_warmup) { + timers.print(); + } // Scale up the flow to the final size (if needed). if (finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) { @@ -1322,7 +1211,6 @@ private: GLuint splat_vs_obj; GLuint splat_fs_obj; GLuint splat_program; - GLuint splat_vao; GLuint uniform_invert_flow, uniform_splat_size, uniform_alpha; GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex; @@ -1335,15 +1223,6 @@ Splat::Splat() splat_fs_obj = compile_shader(read_file("splat.frag"), GL_FRAGMENT_SHADER); splat_program = link_program(splat_vs_obj, splat_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &splat_vao); - glBindVertexArray(splat_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(splat_program, "position"); - glEnableVertexArrayAttrib(splat_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_invert_flow = glGetUniformLocation(splat_program, "invert_flow"); uniform_splat_size = glGetUniformLocation(splat_program, "splat_size"); uniform_alpha = glGetUniformLocation(splat_program, "alpha"); @@ -1373,7 +1252,6 @@ void Splat::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backw glDisable(GL_BLEND); glEnable(GL_DEPTH_TEST); glDepthFunc(GL_LESS); // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.) - glBindVertexArray(splat_vao); fbos.render_to(depth_tex, flow_tex); @@ -1424,7 +1302,6 @@ private: GLuint fill_vs_obj; GLuint fill_fs_obj; GLuint fill_program; - GLuint fill_vao; GLuint uniform_tex; GLuint uniform_z, uniform_sample_offset; @@ -1436,15 +1313,6 @@ HoleFill::HoleFill() fill_fs_obj = compile_shader(read_file("hole_fill.frag"), GL_FRAGMENT_SHADER); fill_program = link_program(fill_vs_obj, fill_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &fill_vao); - glBindVertexArray(fill_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(fill_program, "position"); - glEnableVertexArrayAttrib(fill_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_tex = glGetUniformLocation(fill_program, "tex"); uniform_z = glGetUniformLocation(fill_program, "z"); uniform_sample_offset = glGetUniformLocation(fill_program, "sample_offset"); @@ -1462,7 +1330,6 @@ void HoleFill::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int w glDisable(GL_BLEND); glEnable(GL_DEPTH_TEST); glDepthFunc(GL_LESS); // Only update the values > 0.999f (ie., only invalid pixels). - glBindVertexArray(fill_vao); fbos.render_to(depth_tex, flow_tex); // NOTE: Reading and writing to the same texture. @@ -1518,7 +1385,6 @@ private: GLuint blend_vs_obj; GLuint blend_fs_obj; GLuint blend_program; - GLuint blend_vao; GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex; GLuint uniform_z, uniform_sample_offset; @@ -1530,15 +1396,6 @@ HoleBlend::HoleBlend() blend_fs_obj = compile_shader(read_file("hole_blend.frag"), GL_FRAGMENT_SHADER); blend_program = link_program(blend_vs_obj, blend_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &blend_vao); - glBindVertexArray(blend_vao); - glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); - - GLint position_attrib = glGetAttribLocation(blend_program, "position"); - glEnableVertexArrayAttrib(blend_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_left_tex = glGetUniformLocation(blend_program, "left_tex"); uniform_right_tex = glGetUniformLocation(blend_program, "right_tex"); uniform_up_tex = glGetUniformLocation(blend_program, "up_tex"); @@ -1563,7 +1420,6 @@ void HoleBlend::exec(GLuint flow_tex, GLuint depth_tex, GLuint temp_tex[3], int glDisable(GL_BLEND); glEnable(GL_DEPTH_TEST); glDepthFunc(GL_LEQUAL); // Skip over all of the pixels that were never holes to begin with. - glBindVertexArray(blend_vao); fbos.render_to(depth_tex, flow_tex); // NOTE: Reading and writing to the same texture. @@ -1582,7 +1438,6 @@ private: GLuint blend_vs_obj; GLuint blend_fs_obj; GLuint blend_program; - GLuint blend_vao; GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex; GLuint uniform_alpha, uniform_flow_consistency_tolerance; @@ -1594,14 +1449,6 @@ Blend::Blend() blend_fs_obj = compile_shader(read_file("blend.frag"), GL_FRAGMENT_SHADER); blend_program = link_program(blend_vs_obj, blend_fs_obj); - // Set up the VAO containing all the required position/texcoord data. - glCreateVertexArrays(1, &blend_vao); - glBindVertexArray(blend_vao); - - GLint position_attrib = glGetAttribLocation(blend_program, "position"); - glEnableVertexArrayAttrib(blend_vao, position_attrib); - glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - uniform_image0_tex = glGetUniformLocation(blend_program, "image0_tex"); uniform_image1_tex = glGetUniformLocation(blend_program, "image1_tex"); uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex"); @@ -1616,12 +1463,9 @@ void Blend::exec(GLuint tex0, GLuint tex1, GLuint flow_tex, GLuint output_tex, i bind_sampler(blend_program, uniform_image1_tex, 1, tex1, linear_sampler); bind_sampler(blend_program, uniform_flow_tex, 2, flow_tex, linear_sampler); // May be upsampled. glProgramUniform1f(blend_program, uniform_alpha, alpha); - //glProgramUniform1f(blend_program, uniform_flow_consistency_tolerance, 1.0f / glViewport(0, 0, level_width, level_height); fbos.render_to(output_tex); - glBindVertexArray(blend_vao); - glUseProgram(blend_program); glDisable(GL_BLEND); // A bit ironic, perhaps. glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -1641,7 +1485,9 @@ public: private: int width, height, flow_level; + GLuint vertex_vbo, vao; TexturePool pool; + Splat splat; HoleFill hole_fill; HoleBlend hole_blend; @@ -1649,7 +1495,25 @@ private: }; Interpolate::Interpolate(int width, int height, int flow_level) - : width(width), height(height), flow_level(flow_level) {} + : width(width), height(height), flow_level(flow_level) { + // Set up the vertex data that will be shared between all passes. + float vertices[] = { + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 1.0f, + 1.0f, 0.0f, + }; + glCreateBuffers(1, &vertex_vbo); + glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW); + + glCreateVertexArrays(1, &vao); + glBindVertexArray(vao); + glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); + + GLint position_attrib = 0; // Hard-coded in every vertex shader. + glEnableVertexArrayAttrib(vao, position_attrib); + glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); +} GLuint Interpolate::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLuint backward_flow_tex, GLuint width, GLuint height, float alpha) { @@ -1657,6 +1521,8 @@ GLuint Interpolate::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLui ScopedTimer total_timer("Total", &timers); + glBindVertexArray(vao); + // Pick out the right level to test splatting results on. GLuint tex0_view, tex1_view; glGenTextures(1, &tex0_view); @@ -1698,8 +1564,11 @@ GLuint Interpolate::exec(GLuint tex0, GLuint tex1, GLuint forward_flow_tex, GLui ScopedTimer timer("Blend", &total_timer); blend.exec(tex0, tex1, flow_tex, output_tex, width, height, alpha); } + pool.release_texture(flow_tex); total_timer.end(); - timers.print(); + if (!in_warmup) { + timers.print(); + } return output_tex; } @@ -1904,6 +1773,16 @@ void compute_flow_only(int argc, char **argv, int optind) glGenerateTextureMipmap(tex1_gray); DISComputeFlow compute_flow(width1, height1); + + if (enable_warmup) { + in_warmup = true; + for (int i = 0; i < 10; ++i) { + GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE); + compute_flow.release_texture(final_tex); + } + in_warmup = false; + } + GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE); schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm"); @@ -1998,6 +1877,19 @@ void interpolate_image(int argc, char **argv, int optind) gray.exec(tex1, tex1_gray, width1, height1); glGenerateTextureMipmap(tex1_gray); + if (enable_warmup) { + in_warmup = true; + for (int i = 0; i < 10; ++i) { + GLuint forward_flow_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); + GLuint backward_flow_tex = compute_flow.exec(tex1_gray, tex0_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); + GLuint interpolated_tex = interpolate.exec(tex0, tex1, forward_flow_tex, backward_flow_tex, width1, height1, 0.5f); + compute_flow.release_texture(forward_flow_tex); + compute_flow.release_texture(backward_flow_tex); + interpolate.release_texture(interpolated_tex); + } + in_warmup = false; + } + GLuint forward_flow_tex = compute_flow.exec(tex0_gray, tex1_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); GLuint backward_flow_tex = compute_flow.exec(tex1_gray, tex0_gray, DISComputeFlow::DO_NOT_RESIZE_FLOW); @@ -2026,7 +1918,8 @@ int main(int argc, char **argv) { "disable-timing", no_argument, 0, 1000 }, { "detailed-timing", no_argument, 0, 1003 }, { "ignore-variational-refinement", no_argument, 0, 1001 }, // Still calculates it, just doesn't apply it. - { "interpolate", no_argument, 0, 1002 } + { "interpolate", no_argument, 0, 1002 }, + { "warmup", no_argument, 0, 1004 } }; for ( ;; ) { @@ -2058,6 +1951,9 @@ int main(int argc, char **argv) case 1003: detailed_timing = true; break; + case 1004: + enable_warmup = true; + break; default: fprintf(stderr, "Unknown option '%s'\n", argv[option_index]); exit(1);