]> git.sesse.net Git - nageru/blobdiff - flow.cpp
Give the variational refinement terms slightly less mysterious names.
[nageru] / flow.cpp
index df6ac6df625d96586049234fc9c0e9094eca9017..b6f0517ea746a3698cecee814b10ab9ea3b37dcf 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
 #include "util.h"
 
 #include <algorithm>
+#include <deque>
 #include <memory>
 #include <map>
+#include <stack>
 #include <vector>
 
 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
@@ -42,9 +44,18 @@ bool enable_variational_refinement = true;  // Just for debugging.
 
 // Some global OpenGL objects.
 // TODO: These should really be part of DISComputeFlow.
-GLuint nearest_sampler, linear_sampler, smoothness_sampler;
+GLuint nearest_sampler, linear_sampler, zero_border_sampler;
 GLuint vertex_vbo;
 
+// Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
+struct ReadInProgress {
+       GLuint pbo;
+       string filename0, filename1;
+       string flow_filename, ppm_filename;  // Either may be empty for no write.
+};
+stack<GLuint> spare_pbos;
+deque<ReadInProgress> reads_in_progress;
+
 string read_file(const string &filename)
 {
        FILE *fp = fopen(filename.c_str(), "r");
@@ -388,7 +399,7 @@ void MotionSearch::exec(GLuint tex0_view, GLuint tex1_view, GLuint grad0_tex, GL
 
        bind_sampler(motion_search_program, uniform_image0_tex, 0, tex0_view, nearest_sampler);
        bind_sampler(motion_search_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
-       bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, nearest_sampler);
+       bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler);
        bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler);
 
        glProgramUniform2f(motion_search_program, uniform_image_size, level_width, level_height);
@@ -731,8 +742,8 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex
        bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
        bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
        bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
-       bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, smoothness_sampler);
-       bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, smoothness_sampler);
+       bind_sampler(equations_program, uniform_smoothness_x_tex, 5, smoothness_x_tex, zero_border_sampler);
+       bind_sampler(equations_program, uniform_smoothness_y_tex, 6, smoothness_y_tex, zero_border_sampler);
        glProgramUniform1f(equations_program, uniform_delta, vr_delta);
        glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
 
@@ -791,8 +802,8 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_te
        glUseProgram(sor_program);
 
        bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
-       bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, smoothness_sampler);
-       bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, smoothness_sampler);
+       bind_sampler(sor_program, uniform_smoothness_x_tex, 1, smoothness_x_tex, zero_border_sampler);
+       bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler);
        bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
 
        glViewport(0, 0, level_width, level_height);
@@ -1050,13 +1061,15 @@ DISComputeFlow::DISComputeFlow(int width, int height)
 
        // The smoothness is sampled so that once we get to a smoothness involving
        // a value outside the border, the diffusivity between the two becomes zero.
-       glCreateSamplers(1, &smoothness_sampler);
-       glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-       glSamplerParameteri(smoothness_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-       glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
-       glSamplerParameteri(smoothness_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+       // Similarly, gradients are zero outside the border, since the edge is taken
+       // to be constant.
+       glCreateSamplers(1, &zero_border_sampler);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
        float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };
-       glSamplerParameterfv(smoothness_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+       glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
 
        // Initial flow is zero, 1x1.
        glCreateTextures(GL_TEXTURE_2D, 1, &initial_flow_tex);
@@ -1315,28 +1328,63 @@ void write_ppm(const char *filename, const float *dense_flow, unsigned width, un
        fclose(fp);
 }
 
+void finish_one_read(GLuint width, GLuint height)
+{
+       assert(!reads_in_progress.empty());
+       ReadInProgress read = reads_in_progress.front();
+       reads_in_progress.pop_front();
+
+       unique_ptr<float[]> flow(new float[width * height * 2]);
+       void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * 2 * sizeof(float), GL_MAP_READ_BIT);  // Blocks if the read isn't done yet.
+       memcpy(flow.get(), buf, width * height * 2 * sizeof(float));
+       glUnmapNamedBuffer(read.pbo);
+       spare_pbos.push(read.pbo);
+
+       flip_coordinate_system(flow.get(), width, height);
+       if (!read.flow_filename.empty()) {
+               write_flow(read.flow_filename.c_str(), flow.get(), width, height);
+               fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
+       }
+       if (!read.ppm_filename.empty()) {
+               write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
+       }
+}
+
+void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
+{
+       if (spare_pbos.empty()) {
+               finish_one_read(width, height);
+       }
+       assert(!spare_pbos.empty());
+       reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
+       spare_pbos.pop();
+       glGetTextureImage(tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), nullptr);
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
 int main(int argc, char **argv)
 {
         static const option long_options[] = {
-                { "alpha", required_argument, 0, 'a' },
-                { "delta", required_argument, 0, 'd' },
-                { "gamma", required_argument, 0, 'g' },
+               { "smoothness-relative-weight", required_argument, 0, 's' },  // alpha.
+               { "intensity-relative-weight", required_argument, 0, 'i' },  // delta.
+               { "gradient-relative-weight", required_argument, 0, 'g' },  // gamma.
                { "disable-timing", no_argument, 0, 1000 },
                { "ignore-variational-refinement", no_argument, 0, 1001 }  // Still calculates it, just doesn't apply it.
        };
 
        for ( ;; ) {
                int option_index = 0;
-               int c = getopt_long(argc, argv, "a:d:g:", long_options, &option_index);
+               int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
 
                if (c == -1) {
                        break;
                }
                switch (c) {
-               case 'a':
+               case 's':
                        vr_alpha = atof(optarg);
                        break;
-               case 'd':
+               case 'i':
                        vr_delta = atof(optarg);
                        break;
                case 'g':
@@ -1378,7 +1426,6 @@ int main(int argc, char **argv)
        const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
        const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
        const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
-       fprintf(stderr, "%s %s -> %s\n", filename0, filename1, flow_filename);
 
        // Load pictures.
        unsigned width1, height1, width2, height2;
@@ -1391,6 +1438,14 @@ int main(int argc, char **argv)
                exit(1);
        }
 
+       // Set up some PBOs to do asynchronous readback.
+       GLuint pbos[5];
+        glCreateBuffers(5, pbos);
+       for (int i = 0; i < 5; ++i) {
+               glNamedBufferData(pbos[i], width1 * height1 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
+               spare_pbos.push(pbos[i]);
+       }
+
        // FIXME: Should be part of DISComputeFlow (but needs to be initialized
        // before all the render passes).
        float vertices[] = {
@@ -1406,17 +1461,9 @@ int main(int argc, char **argv)
        DISComputeFlow compute_flow(width1, height1);
        GLuint final_tex = compute_flow.exec(tex0, tex1);
 
-       unique_ptr<float[]> dense_flow(new float[width1 * height1 * 2]);
-       glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width1 * height1 * 2 * sizeof(float), dense_flow.get());
-
+       schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
        compute_flow.release_texture(final_tex);
 
-       flip_coordinate_system(dense_flow.get(), width1, height1);
-       write_flow(flow_filename, dense_flow.get(), width1, height1);
-       write_ppm("flow.ppm", dense_flow.get(), width1, height1);
-
-       dense_flow.reset();
-
        // See if there are more flows on the command line (ie., more than three arguments),
        // and if so, process them.
        int num_flows = (argc - optind) / 3;
@@ -1424,8 +1471,6 @@ int main(int argc, char **argv)
                const char *filename0 = argv[optind + i * 3 + 0];
                const char *filename1 = argv[optind + i * 3 + 1];
                const char *flow_filename = argv[optind + i * 3 + 2];
-               fprintf(stderr, "%s %s -> %s\n", filename0, filename1, flow_filename);
-
                GLuint width, height;
                GLuint tex0 = load_texture(filename0, &width, &height);
                if (width != width1 || height != height1) {
@@ -1442,15 +1487,11 @@ int main(int argc, char **argv)
                }
 
                GLuint final_tex = compute_flow.exec(tex0, tex1);
-
-               unique_ptr<float[]> dense_flow(new float[width * height * 2]);
-               glGetTextureImage(final_tex, 0, GL_RG, GL_FLOAT, width * height * 2 * sizeof(float), dense_flow.get());
-
+               schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "");
                compute_flow.release_texture(final_tex);
-
-               flip_coordinate_system(dense_flow.get(), width, height);
-               write_flow(flow_filename, dense_flow.get(), width, height);
        }
 
-       fprintf(stderr, "err = %d\n", glGetError());
+       while (!reads_in_progress.empty()) {
+               finish_one_read(width1, height1);
+       }
 }