#include "util.h"
#include <algorithm>
+#include <deque>
#include <memory>
#include <map>
+#include <stack>
#include <vector>
#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
// Some global OpenGL objects.
// TODO: These should really be part of DISComputeFlow.
-GLuint nearest_sampler, linear_sampler, smoothness_sampler;
+GLuint nearest_sampler, linear_sampler, zero_border_sampler;
GLuint vertex_vbo;
+// Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
+struct ReadInProgress {
+ GLuint pbo;
+ string filename0, filename1;
+ string flow_filename, ppm_filename; // Either may be empty for no write.
+};
+stack<GLuint> spare_pbos;
+deque<ReadInProgress> reads_in_progress;
+
string read_file(const string &filename)
{
FILE *fp = fopen(filename.c_str(), "r");
GLuint motion_search_program;
GLuint motion_search_vao;
- GLuint uniform_image_size, uniform_inv_image_size, uniform_inv_prev_level_size;
+ GLuint uniform_image_size, uniform_inv_image_size, uniform_inv_flow_size, uniform_inv_prev_level_size;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
};
uniform_image_size = glGetUniformLocation(motion_search_program, "image_size");
uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
+ uniform_inv_flow_size = glGetUniformLocation(motion_search_program, "inv_flow_size");
uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex");
uniform_image1_tex = glGetUniformLocation(motion_search_program, "image1_tex");
bind_sampler(motion_search_program, uniform_image0_tex, 0, tex0_view, nearest_sampler);
bind_sampler(motion_search_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
- bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, nearest_sampler);
+ bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler);
bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler);
glProgramUniform2f(motion_search_program, uniform_image_size, level_width, level_height);
glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
+ glProgramUniform2f(motion_search_program, uniform_inv_flow_size, 1.0f / width_patches, 1.0f / height_patches);
glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
glViewport(0, 0, width_patches, height_patches);
bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
- bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, smoothness_sampler);
- bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, zero_border_sampler);
+ bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, zero_border_sampler);
glProgramUniform1f(equations_program, uniform_delta, vr_delta);
glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
glUseProgram(sor_program);
bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
- bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, smoothness_sampler);
- bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, zero_border_sampler);
+ bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler);
bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
glViewport(0, 0, level_width, level_height);
// The smoothness is sampled so that once we get to a smoothness involving
// a value outside the border, the diffusivity between the two becomes zero.
- glCreateSamplers(1, &smoothness_sampler);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+ // Similarly, gradients are zero outside the border, since the edge is taken
+ // to be constant.
+ glCreateSamplers(1, &zero_border_sampler);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- glSamplerParameterfv(smoothness_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+ glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
// Initial flow is zero, 1x1.
glCreateTextures(GL_TEXTURE_2D, 1, &initial_flow_tex);
fclose(fp);
}
+void finish_one_read(GLuint width, GLuint height)
+{
+ assert(!reads_in_progress.empty());
+ ReadInProgress read = reads_in_progress.front();
+ reads_in_progress.pop_front();
+
+ unique_ptr<float[]> flow(new float[width * height * 2]);
+ void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * 2 * sizeof(float), GL_MAP_READ_BIT); // Blocks if the read isn't done yet.
+ memcpy(flow.get(), buf, width * height * 2 * sizeof(float));
+ glUnmapNamedBuffer(read.pbo);
+ spare_pbos.push(read.pbo);
+
+ flip_coordinate_system(flow.get(), width, height);
+ if (!read.flow_filename.empty()) {
+ write_flow(read.flow_filename.c_str(), flow.get(), width, height);
+ fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
+ }
+ if (!read.ppm_filename.empty()) {
+ write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
+ }
+}
+
+void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
+{
+ if (spare_pbos.empty()) {
+ finish_one_read(width, height);
+ }
+ assert(!spare_pbos.empty());
+ reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
+ spare_pbos.pop();
+ glGetTextureImage(tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), nullptr);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
int main(int argc, char **argv)
{
static const option long_options[] = {
const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
- fprintf(stderr, "%s %s -> %s\n", filename0, filename1, flow_filename);
// Load pictures.
unsigned width1, height1, width2, height2;
exit(1);
}
+ // Set up some PBOs to do asynchronous readback.
+ GLuint pbos[5];
+ glCreateBuffers(5, pbos);
+ for (int i = 0; i < 5; ++i) {
+ glNamedBufferData(pbos[i], width1 * height1 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
+ spare_pbos.push(pbos[i]);
+ }
+
// FIXME: Should be part of DISComputeFlow (but needs to be initialized
// before all the render passes).
float vertices[] = {
DISComputeFlow compute_flow(width1, height1);
GLuint final_tex = compute_flow.exec(tex0, tex1);
- unique_ptr<float[]> dense_flow(new float[width1 * height1 * 2]);
- glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width1 * height1 * 2 * sizeof(float), dense_flow.get());
-
+ schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
compute_flow.release_texture(final_tex);
- flip_coordinate_system(dense_flow.get(), width1, height1);
- write_flow(flow_filename, dense_flow.get(), width1, height1);
- write_ppm("flow.ppm", dense_flow.get(), width1, height1);
-
- dense_flow.reset();
-
// See if there are more flows on the command line (ie., more than three arguments),
// and if so, process them.
int num_flows = (argc - optind) / 3;
const char *filename0 = argv[optind + i * 3 + 0];
const char *filename1 = argv[optind + i * 3 + 1];
const char *flow_filename = argv[optind + i * 3 + 2];
- fprintf(stderr, "%s %s -> %s\n", filename0, filename1, flow_filename);
-
GLuint width, height;
GLuint tex0 = load_texture(filename0, &width, &height);
if (width != width1 || height != height1) {
}
GLuint final_tex = compute_flow.exec(tex0, tex1);
-
- unique_ptr<float[]> dense_flow(new float[width * height * 2]);
- glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), dense_flow.get());
-
+ schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "");
compute_flow.release_texture(final_tex);
-
- flip_coordinate_system(dense_flow.get(), width, height);
- write_flow(flow_filename, dense_flow.get(), width, height);
}
- fprintf(stderr, "err = %d\n", glGetError());
+ while (!reads_in_progress.empty()) {
+ finish_one_read(width1, height1);
+ }
}