+// Warp I_1 to I_w, and then compute the mean (I) and difference (I_t) of
+// I_0 and I_w. The prewarping is what enables us to solve the variational
+// flow for du,dv instead of u,v.
+//
+// Also calculates the normalized flow, ie. divides by z (this is needed because
+// Densify works by additive blending) and multiplies by the image size.
+//
+// See variational_refinement.txt for more information.
+class Prewarp {
+public:
+ Prewarp();
+ void exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint normalized_flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height);
+
+private:
+ GLuint prewarp_vs_obj;
+ GLuint prewarp_fs_obj;
+ GLuint prewarp_program;
+ GLuint prewarp_vao;
+
+ GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
+ GLuint uniform_image_size;
+};
+
+Prewarp::Prewarp()
+{
+ prewarp_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ prewarp_fs_obj = compile_shader(read_file("prewarp.frag"), GL_FRAGMENT_SHADER);
+ prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &prewarp_vao);
+ glBindVertexArray(prewarp_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(prewarp_program, "position");
+ glEnableVertexArrayAttrib(prewarp_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_image0_tex = glGetUniformLocation(prewarp_program, "image0_tex");
+ uniform_image1_tex = glGetUniformLocation(prewarp_program, "image1_tex");
+ uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
+
+ uniform_image_size = glGetUniformLocation(prewarp_program, "image_size");
+}
+
+void Prewarp::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height)
+{
+ glUseProgram(prewarp_program);
+
+ bind_sampler(prewarp_program, uniform_image0_tex, 0, tex0_view, nearest_sampler);
+ bind_sampler(prewarp_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
+ bind_sampler(prewarp_program, uniform_flow_tex, 2, flow_tex, nearest_sampler);
+
+ glProgramUniform2f(prewarp_program, uniform_image_size, level_width, level_height);
+
+ GLuint prewarp_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &prewarp_fbo);
+ GLenum bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
+ glNamedFramebufferDrawBuffers(prewarp_fbo, 3, bufs);
+ glNamedFramebufferTexture(prewarp_fbo, GL_COLOR_ATTACHMENT0, I_tex, 0);
+ glNamedFramebufferTexture(prewarp_fbo, GL_COLOR_ATTACHMENT1, I_t_tex, 0);
+ glNamedFramebufferTexture(prewarp_fbo, GL_COLOR_ATTACHMENT2, normalized_flow_tex, 0);
+
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ glBindVertexArray(prewarp_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, prewarp_fbo);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+// From I, calculate the partial derivatives I_x and I_y. We use a four-tap
+// central difference filter, since apparently, that's tradition (I haven't
+// measured quality versus a more normal 0.5 (I[x+1] - I[x-1]).)
+// The coefficients come from
+//
+// https://en.wikipedia.org/wiki/Finite_difference_coefficient
+//
+// Also computes β_0, since it depends only on I_x and I_y.
+class Derivatives {
+public:
+ Derivatives();
+ void exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height);
+
+private:
+ GLuint derivatives_vs_obj;
+ GLuint derivatives_fs_obj;
+ GLuint derivatives_program;
+ GLuint derivatives_vao;
+
+ GLuint uniform_tex;
+};
+
+Derivatives::Derivatives()
+{
+ derivatives_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ derivatives_fs_obj = compile_shader(read_file("derivatives.frag"), GL_FRAGMENT_SHADER);
+ derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &derivatives_vao);
+ glBindVertexArray(derivatives_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(derivatives_program, "position");
+ glEnableVertexArrayAttrib(derivatives_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_tex = glGetUniformLocation(derivatives_program, "tex");
+}
+
+void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height)
+{
+ glUseProgram(derivatives_program);
+
+ bind_sampler(derivatives_program, uniform_tex, 0, input_tex, nearest_sampler);
+
+ GLuint derivatives_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &derivatives_fbo);
+ GLenum bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
+ glNamedFramebufferDrawBuffers(derivatives_fbo, 2, bufs);
+ glNamedFramebufferTexture(derivatives_fbo, GL_COLOR_ATTACHMENT0, I_x_y_tex, 0);
+ glNamedFramebufferTexture(derivatives_fbo, GL_COLOR_ATTACHMENT1, beta_0_tex, 0);
+
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ glBindVertexArray(derivatives_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, derivatives_fbo);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+// Calculate the smoothness constraints between neighboring pixels;
+// s_x(x,y) stores smoothness between pixel (x,y) and (x+1,y),
+// and s_y(x,y) stores between (x,y) and (x,y+1). We'll sample with
+// border color (0,0) later, so that there's zero diffusion out of
+// the border.
+//
+// See variational_refinement.txt for more information.
+class ComputeSmoothness {
+public:
+ ComputeSmoothness();
+ void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height);
+
+private:
+ GLuint smoothness_vs_obj;
+ GLuint smoothness_fs_obj;
+ GLuint smoothness_program;
+ GLuint smoothness_vao;
+
+ GLuint uniform_flow_tex, uniform_diff_flow_tex;
+};
+
+ComputeSmoothness::ComputeSmoothness()
+{
+ smoothness_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ smoothness_fs_obj = compile_shader(read_file("smoothness.frag"), GL_FRAGMENT_SHADER);
+ smoothness_program = link_program(smoothness_vs_obj, smoothness_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &smoothness_vao);
+ glBindVertexArray(smoothness_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(smoothness_program, "position");
+ glEnableVertexArrayAttrib(smoothness_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_flow_tex = glGetUniformLocation(smoothness_program, "flow_tex");
+ uniform_diff_flow_tex = glGetUniformLocation(smoothness_program, "diff_flow_tex");
+}
+
+void ComputeSmoothness::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height)
+{
+ glUseProgram(smoothness_program);
+
+ bind_sampler(smoothness_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+ bind_sampler(smoothness_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler);
+
+ GLuint smoothness_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &smoothness_fbo);
+ GLenum bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
+ glNamedFramebufferDrawBuffers(smoothness_fbo, 2, bufs);
+ glNamedFramebufferTexture(smoothness_fbo, GL_COLOR_ATTACHMENT0, smoothness_x_tex, 0);
+ glNamedFramebufferTexture(smoothness_fbo, GL_COLOR_ATTACHMENT1, smoothness_y_tex, 0);
+
+ glViewport(0, 0, level_width, level_height);
+
+ glDisable(GL_BLEND);
+ glBindVertexArray(smoothness_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, smoothness_fbo);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ // Make sure the smoothness on the right and upper borders is zero.
+ // We could have done this by making (W-1)xH and Wx(H-1) textures instead
+ // (we're sampling smoothness with all-zero border color), but we'd
+ // have to adjust the sampling coordinates, which is annoying.
+ glClearTexSubImage(smoothness_x_tex, 0, level_width - 1, 0, 0, 1, level_height, 1, GL_RED, GL_FLOAT, nullptr);
+ glClearTexSubImage(smoothness_y_tex, 0, 0, level_height - 1, 0, level_width, 1, 1, GL_RED, GL_FLOAT, nullptr);
+}
+
+// Set up the equations set (two equations in two unknowns, per pixel).
+// We store five floats; the three non-redundant elements of the 2x2 matrix (A)
+// as 32-bit floats, and the two elements on the right-hand side (b) as 16-bit
+// floats. (Actually, we store the inverse of the diagonal elements, because
+// we only ever need to divide by them.) This fits into four u32 values;
+// R, G, B for the matrix (the last element is symmetric) and A for the two b values.
+// All the values of the energy term (E_I, E_G, E_S), except the smoothness
+// terms that depend on other pixels, are calculated in one pass.
+//
+// See variational_refinement.txt for more information.
+class SetupEquations {
+public:
+ SetupEquations();
+ void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, GLuint equation_tex, int level_width, int level_height);
+
+private:
+ GLuint equations_vs_obj;
+ GLuint equations_fs_obj;
+ GLuint equations_program;
+ GLuint equations_vao;
+
+ GLuint uniform_I_x_y_tex, uniform_I_t_tex;
+ GLuint uniform_diff_flow_tex, uniform_base_flow_tex;
+ GLuint uniform_beta_0_tex;
+ GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex;
+};
+
+SetupEquations::SetupEquations()
+{
+ equations_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ equations_fs_obj = compile_shader(read_file("equations.frag"), GL_FRAGMENT_SHADER);
+ equations_program = link_program(equations_vs_obj, equations_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &equations_vao);
+ glBindVertexArray(equations_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(equations_program, "position");
+ glEnableVertexArrayAttrib(equations_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex");
+ uniform_I_t_tex = glGetUniformLocation(equations_program, "I_t_tex");
+ uniform_diff_flow_tex = glGetUniformLocation(equations_program, "diff_flow_tex");
+ uniform_base_flow_tex = glGetUniformLocation(equations_program, "base_flow_tex");
+ uniform_beta_0_tex = glGetUniformLocation(equations_program, "beta_0_tex");
+ uniform_smoothness_x_tex = glGetUniformLocation(equations_program, "smoothness_x_tex");
+ uniform_smoothness_y_tex = glGetUniformLocation(equations_program, "smoothness_y_tex");
+}
+
+void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, GLuint equation_tex, int level_width, int level_height)
+{
+ glUseProgram(equations_program);
+
+ bind_sampler(equations_program, uniform_I_x_y_tex, 0, I_x_y_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_I_t_tex, 1, I_t_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, smoothness_sampler);
+ bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, smoothness_sampler);
+
+ GLuint equations_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &equations_fbo);
+ glNamedFramebufferTexture(equations_fbo, GL_COLOR_ATTACHMENT0, equation_tex, 0);
+
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ glBindVertexArray(equations_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, equations_fbo);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+// Calculate the smoothness constraints between neighboring pixels;
+// s_x(x,y) stores smoothness between pixel (x,y) and (x+1,y),
+// and s_y(x,y) stores between (x,y) and (x,y+1). We'll sample with
+// border color (0,0) later, so that there's zero diffusion out of
+// the border.
+//
+// See variational_refinement.txt for more information.
+class SOR {
+public:
+ SOR();
+ void exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations);
+
+private:
+ GLuint sor_vs_obj;
+ GLuint sor_fs_obj;
+ GLuint sor_program;
+ GLuint sor_vao;
+
+ GLuint uniform_diff_flow_tex;
+ GLuint uniform_equation_tex;
+ GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex;
+};
+
+SOR::SOR()
+{
+ sor_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER);
+ sor_program = link_program(sor_vs_obj, sor_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &sor_vao);
+ glBindVertexArray(sor_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(sor_program, "position");
+ glEnableVertexArrayAttrib(sor_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex");
+ uniform_equation_tex = glGetUniformLocation(sor_program, "equation_tex");
+ uniform_smoothness_x_tex = glGetUniformLocation(sor_program, "smoothness_x_tex");
+ uniform_smoothness_y_tex = glGetUniformLocation(sor_program, "smoothness_y_tex");
+}
+
+void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations)
+{
+ glUseProgram(sor_program);
+
+ bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+ bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, smoothness_sampler);
+ bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
+
+ GLuint sor_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &sor_fbo);
+ glNamedFramebufferTexture(sor_fbo, GL_COLOR_ATTACHMENT0, diff_flow_tex, 0); // NOTE: Bind to same as we render from!
+
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ glBindVertexArray(sor_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, sor_fbo);
+
+ for (int i = 0; i < num_iterations; ++i) {
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ if (i != num_iterations - 1) {
+ glTextureBarrier();
+ }
+ }
+}
+
+// Simply add the differential flow found by the variational refinement to the base flow.
+// The output is in base_flow_tex; we don't need to make a new texture.
+class AddBaseFlow {
+public:
+ AddBaseFlow();
+ void exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height);
+
+private:
+ GLuint add_flow_vs_obj;
+ GLuint add_flow_fs_obj;
+ GLuint add_flow_program;
+ GLuint add_flow_vao;
+
+ GLuint uniform_diff_flow_tex;
+};
+
+AddBaseFlow::AddBaseFlow()
+{
+ add_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag"), GL_FRAGMENT_SHADER);
+ add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &add_flow_vao);
+ glBindVertexArray(add_flow_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(add_flow_program, "position");
+ glEnableVertexArrayAttrib(add_flow_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex");
+}
+
+void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height)
+{
+ glUseProgram(add_flow_program);
+
+ bind_sampler(add_flow_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+
+ GLuint add_flow_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &add_flow_fbo);
+ glNamedFramebufferTexture(add_flow_fbo, GL_COLOR_ATTACHMENT0, base_flow_tex, 0);
+
+ glViewport(0, 0, level_width, level_height);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE);
+ glBindVertexArray(add_flow_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, add_flow_fbo);
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+class GPUTimers {
+public:
+ void print();
+ pair<GLuint, GLuint> begin_timer(const string &name, int level);
+
+private:
+ struct Timer {
+ string name;
+ int level;
+ pair<GLuint, GLuint> query;
+ };
+ vector<Timer> timers;
+};
+
+pair<GLuint, GLuint> GPUTimers::begin_timer(const string &name, int level)
+{
+ GLuint queries[2];
+ glGenQueries(2, queries);
+ glQueryCounter(queries[0], GL_TIMESTAMP);
+
+ Timer timer;
+ timer.name = name;
+ timer.level = level;
+ timer.query.first = queries[0];
+ timer.query.second = queries[1];
+ timers.push_back(timer);
+ return timer.query;
+}
+
+// Take a copy of the flow, bilinearly interpolated and scaled up.
+class ResizeFlow {
+public:
+ ResizeFlow();
+ void exec(GLuint in_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height);
+
+private:
+ GLuint resize_flow_vs_obj;
+ GLuint resize_flow_fs_obj;
+ GLuint resize_flow_program;
+ GLuint resize_flow_vao;
+
+ GLuint uniform_flow_tex;
+ GLuint uniform_scale_factor;
+};
+
+ResizeFlow::ResizeFlow()
+{
+ resize_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag"), GL_FRAGMENT_SHADER);
+ resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &resize_flow_vao);
+ glBindVertexArray(resize_flow_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(resize_flow_program, "position");
+ glEnableVertexArrayAttrib(resize_flow_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex");
+ uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor");
+}
+
+void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height)
+{
+ glUseProgram(resize_flow_program);
+
+ bind_sampler(resize_flow_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+
+ glProgramUniform2f(resize_flow_program, uniform_scale_factor, float(output_width) / input_width, float(output_height) / input_height);
+
+ GLuint resize_flow_fbo; // TODO: cleanup
+ glCreateFramebuffers(1, &resize_flow_fbo);
+ glNamedFramebufferTexture(resize_flow_fbo, GL_COLOR_ATTACHMENT0, out_tex, 0);
+
+ glViewport(0, 0, output_width, output_height);
+ glDisable(GL_BLEND);
+ glBindVertexArray(resize_flow_vao);
+ glBindFramebuffer(GL_FRAMEBUFFER, resize_flow_fbo);
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+void GPUTimers::print()
+{
+ for (const Timer &timer : timers) {
+ // NOTE: This makes the CPU wait for the GPU.
+ GLuint64 time_start, time_end;
+ glGetQueryObjectui64v(timer.query.first, GL_QUERY_RESULT, &time_start);
+ glGetQueryObjectui64v(timer.query.second, GL_QUERY_RESULT, &time_end);
+ //fprintf(stderr, "GPU time used = %.1f ms\n", time_elapsed / 1e6);
+ for (int i = 0; i < timer.level * 2; ++i) {
+ fprintf(stderr, " ");
+ }
+ fprintf(stderr, "%-30s %4.1f ms\n", timer.name.c_str(), GLint64(time_end - time_start) / 1e6);
+ }
+}
+
+// A simple RAII class for timing until the end of the scope.
+class ScopedTimer {
+public:
+ ScopedTimer(const string &name, GPUTimers *timers)
+ : timers(timers), level(0)
+ {
+ query = timers->begin_timer(name, level);
+ }
+
+ ScopedTimer(const string &name, ScopedTimer *parent_timer)
+ : timers(parent_timer->timers),
+ level(parent_timer->level + 1)
+ {
+ query = timers->begin_timer(name, level);
+ }
+
+ ~ScopedTimer()
+ {
+ end();
+ }
+
+ void end()
+ {
+ if (!ended) {
+ glQueryCounter(query.second, GL_TIMESTAMP);
+ ended = true;
+ }
+ }
+
+private:
+ GPUTimers *timers;
+ int level;
+ pair<GLuint, GLuint> query;
+ bool ended = false;
+};
+
+int main(int argc, char **argv)