#include "util.h"
#include <algorithm>
+#include <deque>
#include <memory>
#include <map>
+#include <stack>
#include <vector>
#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
// since we have different normalizations and ranges in some cases.
float vr_gamma = 10.0f, vr_delta = 5.0f, vr_alpha = 10.0f;
+bool enable_timing = true;
+bool enable_variational_refinement = true; // Just for debugging.
+
// Some global OpenGL objects.
// TODO: These should really be part of DISComputeFlow.
-GLuint nearest_sampler, linear_sampler, smoothness_sampler;
+GLuint nearest_sampler, linear_sampler, zero_border_sampler;
GLuint vertex_vbo;
+// Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
+struct ReadInProgress {
+ GLuint pbo;
+ string filename0, filename1;
+ string flow_filename, ppm_filename; // Either may be empty for no write.
+};
+stack<GLuint> spare_pbos;
+deque<ReadInProgress> reads_in_progress;
+
string read_file(const string &filename)
{
FILE *fp = fopen(filename.c_str(), "r");
GLuint motion_search_program;
GLuint motion_search_vao;
- GLuint uniform_image_size, uniform_inv_image_size, uniform_inv_prev_level_size;
+ GLuint uniform_image_size, uniform_inv_image_size, uniform_inv_flow_size, uniform_inv_prev_level_size;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
};
uniform_image_size = glGetUniformLocation(motion_search_program, "image_size");
uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
+ uniform_inv_flow_size = glGetUniformLocation(motion_search_program, "inv_flow_size");
uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex");
uniform_image1_tex = glGetUniformLocation(motion_search_program, "image1_tex");
bind_sampler(motion_search_program, uniform_image0_tex, 0, tex0_view, nearest_sampler);
bind_sampler(motion_search_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
- bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, nearest_sampler);
+ bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler);
bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler);
glProgramUniform2f(motion_search_program, uniform_image_size, level_width, level_height);
glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
+ glProgramUniform2f(motion_search_program, uniform_inv_flow_size, 1.0f / width_patches, 1.0f / height_patches);
glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
glViewport(0, 0, width_patches, height_patches);
bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
- bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, smoothness_sampler);
- bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, zero_border_sampler);
+ bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, zero_border_sampler);
glProgramUniform1f(equations_program, uniform_delta, vr_delta);
glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
glUseProgram(sor_program);
bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
- bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, smoothness_sampler);
- bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, zero_border_sampler);
+ bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler);
bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
glViewport(0, 0, level_width, level_height);
pair<GLuint, GLuint> GPUTimers::begin_timer(const string &name, int level)
{
+ if (!enable_timing) {
+ return make_pair(0, 0);
+ }
+
GLuint queries[2];
glGenQueries(2, queries);
glQueryCounter(queries[0], GL_TIMESTAMP);
void end()
{
- if (!ended) {
+ if (enable_timing && !ended) {
glQueryCounter(query.second, GL_TIMESTAMP);
ended = true;
}
// The smoothness is sampled so that once we get to a smoothness involving
// a value outside the border, the diffusivity between the two becomes zero.
- glCreateSamplers(1, &smoothness_sampler);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+ // Similarly, gradients are zero outside the border, since the edge is taken
+ // to be constant.
+ glCreateSamplers(1, &zero_border_sampler);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- glSamplerParameterfv(smoothness_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+ glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
// Initial flow is zero, 1x1.
glCreateTextures(GL_TEXTURE_2D, 1, &initial_flow_tex);
// Add the differential flow found by the variational refinement to the base flow,
// giving the final flow estimate for this level.
// The output is in diff_flow_tex; we don't need to make a new texture.
- // You can comment out this part if you wish to test disabling of the variational refinement.
- {
+ //
+ // Disabling this doesn't save any time (although we could easily make it so that
+ // it is more efficient), but it helps debug the motion search.
+ if (enable_variational_refinement) {
ScopedTimer timer("Add differential flow", &varref_timer);
add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height);
}
assert(false);
}
+// OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
+void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
+{
+ for (unsigned i = 0; i < width * height; ++i) {
+ dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
+ }
+}
+
void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
{
FILE *flowfp = fopen(filename, "wb");
fwrite(&height, 4, 1, flowfp);
for (unsigned y = 0; y < height; ++y) {
int yy = height - y - 1;
- for (unsigned x = 0; x < unsigned(width); ++x) {
- float du = dense_flow[(yy * width + x) * 2 + 0];
- float dv = dense_flow[(yy * width + x) * 2 + 1];
-
- dv = -dv;
-
- fwrite(&du, 4, 1, flowfp);
- fwrite(&dv, 4, 1, flowfp);
- }
+ fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
}
fclose(flowfp);
}
float du = dense_flow[(yy * width + x) * 2 + 0];
float dv = dense_flow[(yy * width + x) * 2 + 1];
- dv = -dv;
-
uint8_t r, g, b;
flow2rgb(du, dv, &r, &g, &b);
putc(r, fp);
fclose(fp);
}
+void finish_one_read(GLuint width, GLuint height)
+{
+ assert(!reads_in_progress.empty());
+ ReadInProgress read = reads_in_progress.front();
+ reads_in_progress.pop_front();
+
+ unique_ptr<float[]> flow(new float[width * height * 2]);
+ void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * 2 * sizeof(float), GL_MAP_READ_BIT); // Blocks if the read isn't done yet.
+ memcpy(flow.get(), buf, width * height * 2 * sizeof(float));
+ glUnmapNamedBuffer(read.pbo);
+ spare_pbos.push(read.pbo);
+
+ flip_coordinate_system(flow.get(), width, height);
+ if (!read.flow_filename.empty()) {
+ write_flow(read.flow_filename.c_str(), flow.get(), width, height);
+ fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
+ }
+ if (!read.ppm_filename.empty()) {
+ write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
+ }
+}
+
+void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
+{
+ if (spare_pbos.empty()) {
+ finish_one_read(width, height);
+ }
+ assert(!spare_pbos.empty());
+ reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
+ spare_pbos.pop();
+ glGetTextureImage(tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), nullptr);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
int main(int argc, char **argv)
{
static const option long_options[] = {
{ "alpha", required_argument, 0, 'a' },
{ "delta", required_argument, 0, 'd' },
- { "gamma", required_argument, 0, 'g' }
+ { "gamma", required_argument, 0, 'g' },
+ { "disable-timing", no_argument, 0, 1000 },
+ { "ignore-variational-refinement", no_argument, 0, 1001 } // Still calculates it, just doesn't apply it.
};
for ( ;; ) {
case 'g':
vr_gamma = atof(optarg);
break;
+ case 1000:
+ enable_timing = false;
+ break;
+ case 1001:
+ enable_variational_refinement = false;
+ break;
default:
fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
exit(1);
SDL_GLContext context = SDL_GL_CreateContext(window);
assert(context != nullptr);
+ const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
+ const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
+ const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
+
// Load pictures.
unsigned width1, height1, width2, height2;
- GLuint tex0 = load_texture(argc >= (optind + 1) ? argv[optind] : "test1499.png", &width1, &height1);
- GLuint tex1 = load_texture(argc >= (optind + 2) ? argv[optind + 1] : "test1500.png", &width2, &height2);
+ GLuint tex0 = load_texture(filename0, &width1, &height1);
+ GLuint tex1 = load_texture(filename1, &width2, &height2);
if (width1 != width2 || height1 != height2) {
fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
exit(1);
}
+ // Set up some PBOs to do asynchronous readback.
+ GLuint pbos[5];
+ glCreateBuffers(5, pbos);
+ for (int i = 0; i < 5; ++i) {
+ glNamedBufferData(pbos[i], width1 * height1 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
+ spare_pbos.push(pbos[i]);
+ }
+
// FIXME: Should be part of DISComputeFlow (but needs to be initialized
// before all the render passes).
float vertices[] = {
DISComputeFlow compute_flow(width1, height1);
GLuint final_tex = compute_flow.exec(tex0, tex1);
- unique_ptr<float[]> dense_flow(new float[width1 * height1 * 2]);
- glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width1 * height1 * 2 * sizeof(float), dense_flow.get());
-
+ schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
compute_flow.release_texture(final_tex);
- write_flow(argc >= (optind + 3) ? argv[optind + 2] : "flow.flo", dense_flow.get(), width1, height1);
- write_ppm("flow.ppm", dense_flow.get(), width1, height1);
-
- dense_flow.reset();
-
// See if there are more flows on the command line (ie., more than three arguments),
// and if so, process them.
int num_flows = (argc - optind) / 3;
const char *filename0 = argv[optind + i * 3 + 0];
const char *filename1 = argv[optind + i * 3 + 1];
const char *flow_filename = argv[optind + i * 3 + 2];
- fprintf(stderr, "%s %s -> %s\n", filename0, filename1, flow_filename);
-
GLuint width, height;
GLuint tex0 = load_texture(filename0, &width, &height);
if (width != width1 || height != height1) {
}
GLuint final_tex = compute_flow.exec(tex0, tex1);
-
- unique_ptr<float[]> dense_flow(new float[width * height * 2]);
- glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), dense_flow.get());
-
+ schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "");
compute_flow.release_texture(final_tex);
-
- write_flow(flow_filename, dense_flow.get(), width, height);
}
- fprintf(stderr, "err = %d\n", glGetError());
+ while (!reads_in_progress.empty()) {
+ finish_one_read(width1, height1);
+ }
}