#include "util.h"
#include <algorithm>
+#include <deque>
#include <memory>
#include <map>
+#include <stack>
#include <vector>
#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
// Weighting constants for the different parts of the variational refinement.
// These don't correspond 1:1 to the values given in the DIS paper,
// since we have different normalizations and ranges in some cases.
-float vr_gamma = 10.0f, vr_delta = 5.0f, vr_alpha = 10.0f;
+// These are found through a simple grid search on some MPI-Sintel data,
+// although the error (EPE) seems to be fairly insensitive to the precise values.
+// Only the relative values matter, so we fix alpha (the smoothness constant)
+// at unity and tweak the others.
+float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
bool enable_timing = true;
+bool enable_variational_refinement = true; // Just for debugging.
// Some global OpenGL objects.
// TODO: These should really be part of DISComputeFlow.
-GLuint nearest_sampler, linear_sampler, smoothness_sampler;
+GLuint nearest_sampler, linear_sampler, zero_border_sampler;
GLuint vertex_vbo;
+// Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
+struct ReadInProgress {
+ GLuint pbo;
+ string filename0, filename1;
+ string flow_filename, ppm_filename; // Either may be empty for no write.
+};
+stack<GLuint> spare_pbos;
+deque<ReadInProgress> reads_in_progress;
+
string read_file(const string &filename)
{
FILE *fp = fopen(filename.c_str(), "r");
GLuint sobel_program;
GLuint sobel_vao;
- GLuint uniform_tex, uniform_image_size;
+ GLuint uniform_tex;
};
Sobel::Sobel()
void Sobel::exec(GLint tex0_view, GLint grad0_tex, int level_width, int level_height)
{
glUseProgram(sobel_program);
- glBindTextureUnit(0, tex0_view);
- glBindSampler(0, nearest_sampler);
- glProgramUniform1i(sobel_program, uniform_tex, 0);
+ bind_sampler(sobel_program, uniform_tex, 0, tex0_view, nearest_sampler);
glViewport(0, 0, level_width, level_height);
fbos.render_to(grad0_tex);
GLuint motion_search_program;
GLuint motion_search_vao;
- GLuint uniform_image_size, uniform_inv_image_size, uniform_inv_prev_level_size;
+ GLuint uniform_inv_image_size, uniform_inv_prev_level_size;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
};
glEnableVertexArrayAttrib(motion_search_vao, position_attrib);
glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
- uniform_image_size = glGetUniformLocation(motion_search_program, "image_size");
uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex");
bind_sampler(motion_search_program, uniform_image0_tex, 0, tex0_view, nearest_sampler);
bind_sampler(motion_search_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
- bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, nearest_sampler);
+ bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler);
bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler);
- glProgramUniform2f(motion_search_program, uniform_image_size, level_width, level_height);
glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
GLuint densify_program;
GLuint densify_vao;
- GLuint uniform_width_patches, uniform_patch_size, uniform_patch_spacing;
+ GLuint uniform_patch_size, uniform_patch_spacing;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
};
glEnableVertexArrayAttrib(densify_vao, position_attrib);
glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
- uniform_width_patches = glGetUniformLocation(densify_program, "width_patches");
uniform_patch_size = glGetUniformLocation(densify_program, "patch_size");
uniform_patch_spacing = glGetUniformLocation(densify_program, "patch_spacing");
uniform_image0_tex = glGetUniformLocation(densify_program, "image0_tex");
bind_sampler(densify_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
bind_sampler(densify_program, uniform_flow_tex, 2, flow_tex, nearest_sampler);
- glProgramUniform1i(densify_program, uniform_width_patches, width_patches);
glProgramUniform2f(densify_program, uniform_patch_size,
float(patch_size_pixels) / level_width,
float(patch_size_pixels) / level_height);
GLuint prewarp_vao;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
- GLuint uniform_image_size;
};
Prewarp::Prewarp()
uniform_image0_tex = glGetUniformLocation(prewarp_program, "image0_tex");
uniform_image1_tex = glGetUniformLocation(prewarp_program, "image1_tex");
uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
-
- uniform_image_size = glGetUniformLocation(prewarp_program, "image_size");
}
void Prewarp::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height)
bind_sampler(prewarp_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
bind_sampler(prewarp_program, uniform_flow_tex, 2, flow_tex, nearest_sampler);
- glProgramUniform2f(prewarp_program, uniform_image_size, level_width, level_height);
-
glViewport(0, 0, level_width, level_height);
glDisable(GL_BLEND);
glBindVertexArray(prewarp_vao);
bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
- bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, smoothness_sampler);
- bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, zero_border_sampler);
+ bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, zero_border_sampler);
glProgramUniform1f(equations_program, uniform_delta, vr_delta);
glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
GLuint uniform_diff_flow_tex;
GLuint uniform_equation_tex;
GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex;
+ GLuint uniform_phase;
};
SOR::SOR()
{
- sor_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ sor_vs_obj = compile_shader(read_file("sor.vert"), GL_VERTEX_SHADER);
sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER);
sor_program = link_program(sor_vs_obj, sor_fs_obj);
uniform_equation_tex = glGetUniformLocation(sor_program, "equation_tex");
uniform_smoothness_x_tex = glGetUniformLocation(sor_program, "smoothness_x_tex");
uniform_smoothness_y_tex = glGetUniformLocation(sor_program, "smoothness_y_tex");
+ uniform_phase = glGetUniformLocation(sor_program, "phase");
}
void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations)
glUseProgram(sor_program);
bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
- bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, smoothness_sampler);
- bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, smoothness_sampler);
+ bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, zero_border_sampler);
+ bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler);
bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
+ // NOTE: We bind to the texture we are rendering from, but we never write any value
+ // that we read in the same shader pass (we call discard for red values when we compute
+ // black, and vice versa), and we have barriers between the passes, so we're fine
+ // as per the spec.
glViewport(0, 0, level_width, level_height);
glDisable(GL_BLEND);
glBindVertexArray(sor_vao);
- fbos.render_to(diff_flow_tex); // NOTE: Bind to same as we render from!
+ fbos.render_to(diff_flow_tex);
for (int i = 0; i < num_iterations; ++i) {
+ glProgramUniform1i(sor_program, uniform_phase, 0);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ glProgramUniform1i(sor_program, uniform_phase, 1);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (i != num_iterations - 1) {
glTextureBarrier();
// The smoothness is sampled so that once we get to a smoothness involving
// a value outside the border, the diffusivity between the two becomes zero.
- glCreateSamplers(1, &smoothness_sampler);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
- glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+ // Similarly, gradients are zero outside the border, since the edge is taken
+ // to be constant.
+ glCreateSamplers(1, &zero_border_sampler);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- glSamplerParameterfv(smoothness_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+ glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
// Initial flow is zero, 1x1.
glCreateTextures(GL_TEXTURE_2D, 1, &initial_flow_tex);
int level_width = width >> level;
int level_height = height >> level;
float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio);
- int width_patches = 1 + lrintf((level_width - patch_size_pixels) / patch_spacing_pixels);
- int height_patches = 1 + lrintf((level_height - patch_size_pixels) / patch_spacing_pixels);
+
+ // Make sure we have patches at least every Nth pixel, e.g. for width=9
+ // and patch_spacing=3 (the default), we put out patch centers in
+ // x=0, x=3, x=6, x=9, which is four patches. The fragment shader will
+ // lock all the centers to integer coordinates if needed.
+ int width_patches = 1 + ceil(level_width / patch_spacing_pixels);
+ int height_patches = 1 + ceil(level_height / patch_spacing_pixels);
// Make sure we always read from the correct level; the chosen
// mipmapping could otherwise be rather unpredictable, especially
// Add the differential flow found by the variational refinement to the base flow,
// giving the final flow estimate for this level.
// The output is in diff_flow_tex; we don't need to make a new texture.
- // You can comment out this part if you wish to test disabling of the variational refinement.
- {
+ //
+ // Disabling this doesn't save any time (although we could easily make it so that
+ // it is more efficient), but it helps debug the motion search.
+ if (enable_variational_refinement) {
ScopedTimer timer("Add differential flow", &varref_timer);
add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height);
}
assert(false);
}
+// OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
+void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
+{
+ for (unsigned i = 0; i < width * height; ++i) {
+ dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
+ }
+}
+
void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
{
FILE *flowfp = fopen(filename, "wb");
fwrite(&height, 4, 1, flowfp);
for (unsigned y = 0; y < height; ++y) {
int yy = height - y - 1;
- for (unsigned x = 0; x < unsigned(width); ++x) {
- float du = dense_flow[(yy * width + x) * 2 + 0];
- float dv = dense_flow[(yy * width + x) * 2 + 1];
-
- dv = -dv;
-
- fwrite(&du, 4, 1, flowfp);
- fwrite(&dv, 4, 1, flowfp);
- }
+ fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
}
fclose(flowfp);
}
float du = dense_flow[(yy * width + x) * 2 + 0];
float dv = dense_flow[(yy * width + x) * 2 + 1];
- dv = -dv;
-
uint8_t r, g, b;
flow2rgb(du, dv, &r, &g, &b);
putc(r, fp);
fclose(fp);
}
+void finish_one_read(GLuint width, GLuint height)
+{
+ assert(!reads_in_progress.empty());
+ ReadInProgress read = reads_in_progress.front();
+ reads_in_progress.pop_front();
+
+ unique_ptr<float[]> flow(new float[width * height * 2]);
+ void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * 2 * sizeof(float), GL_MAP_READ_BIT); // Blocks if the read isn't done yet.
+ memcpy(flow.get(), buf, width * height * 2 * sizeof(float));
+ glUnmapNamedBuffer(read.pbo);
+ spare_pbos.push(read.pbo);
+
+ flip_coordinate_system(flow.get(), width, height);
+ if (!read.flow_filename.empty()) {
+ write_flow(read.flow_filename.c_str(), flow.get(), width, height);
+ fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
+ }
+ if (!read.ppm_filename.empty()) {
+ write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
+ }
+}
+
+void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
+{
+ if (spare_pbos.empty()) {
+ finish_one_read(width, height);
+ }
+ assert(!spare_pbos.empty());
+ reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
+ spare_pbos.pop();
+ glGetTextureImage(tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), nullptr);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
int main(int argc, char **argv)
{
static const option long_options[] = {
- { "alpha", required_argument, 0, 'a' },
- { "delta", required_argument, 0, 'd' },
- { "gamma", required_argument, 0, 'g' },
- { "disable-timing", no_argument, 0, 1000 }
+ { "smoothness-relative-weight", required_argument, 0, 's' }, // alpha.
+ { "intensity-relative-weight", required_argument, 0, 'i' }, // delta.
+ { "gradient-relative-weight", required_argument, 0, 'g' }, // gamma.
+ { "disable-timing", no_argument, 0, 1000 },
+ { "ignore-variational-refinement", no_argument, 0, 1001 } // Still calculates it, just doesn't apply it.
};
for ( ;; ) {
int option_index = 0;
- int c = getopt_long(argc, argv, "a:d:g:", long_options, &option_index);
+ int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
if (c == -1) {
break;
}
switch (c) {
- case 'a':
+ case 's':
vr_alpha = atof(optarg);
break;
- case 'd':
+ case 'i':
vr_delta = atof(optarg);
break;
case 'g':
case 1000:
enable_timing = false;
break;
+ case 1001:
+ enable_variational_refinement = false;
+ break;
default:
fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
exit(1);
SDL_GLContext context = SDL_GL_CreateContext(window);
assert(context != nullptr);
+ const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
+ const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
+ const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
+
// Load pictures.
unsigned width1, height1, width2, height2;
- GLuint tex0 = load_texture(argc >= (optind + 1) ? argv[optind] : "test1499.png", &width1, &height1);
- GLuint tex1 = load_texture(argc >= (optind + 2) ? argv[optind + 1] : "test1500.png", &width2, &height2);
+ GLuint tex0 = load_texture(filename0, &width1, &height1);
+ GLuint tex1 = load_texture(filename1, &width2, &height2);
if (width1 != width2 || height1 != height2) {
fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
exit(1);
}
+ // Set up some PBOs to do asynchronous readback.
+ GLuint pbos[5];
+ glCreateBuffers(5, pbos);
+ for (int i = 0; i < 5; ++i) {
+ glNamedBufferData(pbos[i], width1 * height1 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
+ spare_pbos.push(pbos[i]);
+ }
+
// FIXME: Should be part of DISComputeFlow (but needs to be initialized
// before all the render passes).
float vertices[] = {
DISComputeFlow compute_flow(width1, height1);
GLuint final_tex = compute_flow.exec(tex0, tex1);
- unique_ptr<float[]> dense_flow(new float[width1 * height1 * 2]);
- glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width1 * height1 * 2 * sizeof(float), dense_flow.get());
-
+ schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
compute_flow.release_texture(final_tex);
- write_flow(argc >= (optind + 3) ? argv[optind + 2] : "flow.flo", dense_flow.get(), width1, height1);
- write_ppm("flow.ppm", dense_flow.get(), width1, height1);
-
- dense_flow.reset();
-
// See if there are more flows on the command line (ie., more than three arguments),
// and if so, process them.
int num_flows = (argc - optind) / 3;
const char *filename0 = argv[optind + i * 3 + 0];
const char *filename1 = argv[optind + i * 3 + 1];
const char *flow_filename = argv[optind + i * 3 + 2];
- fprintf(stderr, "%s %s -> %s\n", filename0, filename1, flow_filename);
-
GLuint width, height;
GLuint tex0 = load_texture(filename0, &width, &height);
if (width != width1 || height != height1) {
}
GLuint final_tex = compute_flow.exec(tex0, tex1);
-
- unique_ptr<float[]> dense_flow(new float[width * height * 2]);
- glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), dense_flow.get());
-
+ schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "");
compute_flow.release_texture(final_tex);
-
- write_flow(flow_filename, dense_flow.get(), width, height);
}
- fprintf(stderr, "err = %d\n", glGetError());
+ while (!reads_in_progress.empty()) {
+ finish_one_read(width1, height1);
+ }
}