+private:
+ PersistentFBOSet<1> fbos;
+
+ GLuint add_flow_vs_obj;
+ GLuint add_flow_fs_obj;
+ GLuint add_flow_program;
+ GLuint add_flow_vao;
+
+ GLuint uniform_diff_flow_tex;
+};
+
+AddBaseFlow::AddBaseFlow()
+{
+ add_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag"), GL_FRAGMENT_SHADER);
+ add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &add_flow_vao);
+ glBindVertexArray(add_flow_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(add_flow_program, "position");
+ glEnableVertexArrayAttrib(add_flow_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex");
+}
+
+void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height)
+{
+ glUseProgram(add_flow_program);
+
+ bind_sampler(add_flow_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+
+ glViewport(0, 0, level_width, level_height);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE);
+ glBindVertexArray(add_flow_vao);
+ fbos.render_to(base_flow_tex);
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+// Take a copy of the flow, bilinearly interpolated and scaled up.
+class ResizeFlow {
+public:
+ ResizeFlow();
+ void exec(GLuint in_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height);
+
+private:
+ PersistentFBOSet<1> fbos;
+
+ GLuint resize_flow_vs_obj;
+ GLuint resize_flow_fs_obj;
+ GLuint resize_flow_program;
+ GLuint resize_flow_vao;
+
+ GLuint uniform_flow_tex;
+ GLuint uniform_scale_factor;
+};
+
+ResizeFlow::ResizeFlow()
+{
+ resize_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag"), GL_FRAGMENT_SHADER);
+ resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &resize_flow_vao);
+ glBindVertexArray(resize_flow_vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = glGetAttribLocation(resize_flow_program, "position");
+ glEnableVertexArrayAttrib(resize_flow_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex");
+ uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor");
+}
+
+void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height)
+{
+ glUseProgram(resize_flow_program);
+
+ bind_sampler(resize_flow_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+
+ glProgramUniform2f(resize_flow_program, uniform_scale_factor, float(output_width) / input_width, float(output_height) / input_height);
+
+ glViewport(0, 0, output_width, output_height);
+ glDisable(GL_BLEND);
+ glBindVertexArray(resize_flow_vao);
+ fbos.render_to(out_tex);
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+class GPUTimers {
+public:
+ void print();
+ pair<GLuint, GLuint> begin_timer(const string &name, int level);
+
+private:
+ struct Timer {
+ string name;
+ int level;
+ pair<GLuint, GLuint> query;
+ };
+ vector<Timer> timers;
+};
+
+pair<GLuint, GLuint> GPUTimers::begin_timer(const string &name, int level)
+{
+ if (!enable_timing) {
+ return make_pair(0, 0);
+ }
+
+ GLuint queries[2];
+ glGenQueries(2, queries);
+ glQueryCounter(queries[0], GL_TIMESTAMP);
+
+ Timer timer;
+ timer.name = name;
+ timer.level = level;
+ timer.query.first = queries[0];
+ timer.query.second = queries[1];
+ timers.push_back(timer);
+ return timer.query;
+}
+
+void GPUTimers::print()
+{
+ for (const Timer &timer : timers) {
+ // NOTE: This makes the CPU wait for the GPU.
+ GLuint64 time_start, time_end;
+ glGetQueryObjectui64v(timer.query.first, GL_QUERY_RESULT, &time_start);
+ glGetQueryObjectui64v(timer.query.second, GL_QUERY_RESULT, &time_end);
+ //fprintf(stderr, "GPU time used = %.1f ms\n", time_elapsed / 1e6);
+ for (int i = 0; i < timer.level * 2; ++i) {
+ fprintf(stderr, " ");
+ }
+ fprintf(stderr, "%-30s %4.1f ms\n", timer.name.c_str(), GLint64(time_end - time_start) / 1e6);
+ }
+}
+
+// A simple RAII class for timing until the end of the scope.
+class ScopedTimer {
+public:
+ ScopedTimer(const string &name, GPUTimers *timers)
+ : timers(timers), level(0)
+ {
+ query = timers->begin_timer(name, level);
+ }
+
+ ScopedTimer(const string &name, ScopedTimer *parent_timer)
+ : timers(parent_timer->timers),
+ level(parent_timer->level + 1)
+ {
+ query = timers->begin_timer(name, level);
+ }
+
+ ~ScopedTimer()
+ {
+ end();
+ }
+
+ void end()
+ {
+ if (enable_timing && !ended) {
+ glQueryCounter(query.second, GL_TIMESTAMP);
+ ended = true;
+ }
+ }
+
+private:
+ GPUTimers *timers;
+ int level;
+ pair<GLuint, GLuint> query;
+ bool ended = false;
+};
+
+class DISComputeFlow {
+public:
+ DISComputeFlow(int width, int height);