]> git.sesse.net Git - nageru/blobdiff - flow.cpp
Do RGB -> grayscale conversion on the GPU.
[nageru] / flow.cpp
index 4d520eb8fc5dca1acd9a515682e5563065d1fc8e..6bed6d7c9c659713760e62867b67500725532b87 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -60,6 +60,17 @@ struct ReadInProgress {
 stack<GLuint> spare_pbos;
 deque<ReadInProgress> reads_in_progress;
 
+int find_num_levels(int width, int height)
+{
+       int levels = 1;
+       for (int w = width, h = height; w > 1 || h > 1; ) {
+               w >>= 1;
+               h >>= 1;
+               ++levels;
+       }
+       return levels;
+}
+
 string read_file(const string &filename)
 {
        FILE *fp = fopen(filename.c_str(), "r");
@@ -147,8 +158,8 @@ GLuint load_texture(const char *filename, unsigned *width_ret, unsigned *height_
        }
 
        // For whatever reason, SDL doesn't support converting to YUV surfaces
-       // nor grayscale, so we'll do it (slowly) ourselves.
-       SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA8888, /*flags=*/0);
+       // nor grayscale, so we'll do it ourselves.
+       SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA32, /*flags=*/0);
        if (rgb_surf == nullptr) {
                fprintf(stderr, "SDL_ConvertSurfaceFormat(%s): %s\n", filename, SDL_GetError());
                exit(1);
@@ -158,34 +169,19 @@ GLuint load_texture(const char *filename, unsigned *width_ret, unsigned *height_
 
        unsigned width = rgb_surf->w, height = rgb_surf->h;
        const uint8_t *sptr = (uint8_t *)rgb_surf->pixels;
-       unique_ptr<uint8_t[]> pix(new uint8_t[width * height]);
+       unique_ptr<uint8_t[]> pix(new uint8_t[width * height * 4]);
 
        // Extract the Y component, and convert to bottom-left origin.
        for (unsigned y = 0; y < height; ++y) {
                unsigned y2 = height - 1 - y;
-               for (unsigned x = 0; x < width; ++x) {
-                       uint8_t r = sptr[(y2 * width + x) * 4 + 3];
-                       uint8_t g = sptr[(y2 * width + x) * 4 + 2];
-                       uint8_t b = sptr[(y2 * width + x) * 4 + 1];
-
-                       // Rec. 709.
-                       pix[y * width + x] = lrintf(r * 0.2126f + g * 0.7152f + b * 0.0722f);
-               }
+               memcpy(pix.get() + y * width * 4, sptr + y2 * rgb_surf->pitch, width * 4);
        }
        SDL_FreeSurface(rgb_surf);
 
-       int levels = 1;
-       for (int w = width, h = height; w > 1 || h > 1; ) {
-               w >>= 1;
-               h >>= 1;
-               ++levels;
-       }
-
        GLuint tex;
        glCreateTextures(GL_TEXTURE_2D, 1, &tex);
-       glTextureStorage2D(tex, levels, GL_R8, width, height);
-       glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RED, GL_UNSIGNED_BYTE, pix.get());
-       glGenerateTextureMipmap(tex);
+       glTextureStorage2D(tex, 1, GL_RGBA8, width, height);
+       glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pix.get());
 
        *width_ret = width;
        *height_ret = height;
@@ -299,6 +295,52 @@ void PersistentFBOSet<num_elements>::render_to(const array<GLuint, num_elements>
        glBindFramebuffer(GL_FRAMEBUFFER, fbo);
 }
 
+// Convert RGB to grayscale, using Rec. 709 coefficients.
+class GrayscaleConversion {
+public:
+       GrayscaleConversion();
+       void exec(GLint tex, GLint gray_tex, int width, int height);
+
+private:
+       PersistentFBOSet<1> fbos;
+       GLuint gray_vs_obj;
+       GLuint gray_fs_obj;
+       GLuint gray_program;
+       GLuint gray_vao;
+
+       GLuint uniform_tex;
+};
+
+GrayscaleConversion::GrayscaleConversion()
+{
+       gray_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+       gray_fs_obj = compile_shader(read_file("gray.frag"), GL_FRAGMENT_SHADER);
+       gray_program = link_program(gray_vs_obj, gray_fs_obj);
+
+       // Set up the VAO containing all the required position/texcoord data.
+       glCreateVertexArrays(1, &gray_vao);
+       glBindVertexArray(gray_vao);
+
+       GLint position_attrib = glGetAttribLocation(gray_program, "position");
+       glEnableVertexArrayAttrib(gray_vao, position_attrib);
+       glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+       uniform_tex = glGetUniformLocation(gray_program, "tex");
+}
+
+void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height)
+{
+       glUseProgram(gray_program);
+       bind_sampler(gray_program, uniform_tex, 0, tex, nearest_sampler);
+
+       glViewport(0, 0, width, height);
+       fbos.render_to(gray_tex);
+       glBindVertexArray(gray_vao);
+       glUseProgram(gray_program);
+       glDisable(GL_BLEND);
+       glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
 // Compute gradients in every point, used for the motion search.
 // The DIS paper doesn't actually mention how these are computed,
 // but seemingly, a 3x3 Sobel operator is used here (at least in
@@ -1008,6 +1050,21 @@ private:
        bool ended = false;
 };
 
+class TexturePool {
+public:
+       GLuint get_texture(GLenum format, GLuint width, GLuint height);
+       void release_texture(GLuint tex_num);
+
+private:
+       struct Texture {
+               GLuint tex_num;
+               GLenum format;
+               GLuint width, height;
+               bool in_use = false;
+       };
+       vector<Texture> textures;
+};
+
 class DISComputeFlow {
 public:
        DISComputeFlow(int width, int height);
@@ -1015,11 +1072,15 @@ public:
        // Returns a texture that must be released with release_texture()
        // after use.
        GLuint exec(GLuint tex0, GLuint tex1);
-       void release_texture(GLuint tex);
+
+       void release_texture(GLuint tex) {
+               pool.release_texture(tex);
+       }
 
 private:
        int width, height;
        GLuint initial_flow_tex;
+       TexturePool pool;
 
        // The various passes.
        Sobel sobel;
@@ -1032,16 +1093,6 @@ private:
        SOR sor;
        AddBaseFlow add_base_flow;
        ResizeFlow resize_flow;
-
-       struct Texture {
-               GLuint tex_num;
-               GLenum format;
-               GLuint width, height;
-               bool in_use = false;
-       };
-       vector<Texture> textures;
-
-       GLuint get_texture(GLenum format, GLuint width, GLuint height);
 };
 
 DISComputeFlow::DISComputeFlow(int width, int height)
@@ -1080,10 +1131,6 @@ DISComputeFlow::DISComputeFlow(int width, int height)
 
 GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
 {
-       for (const Texture &tex : textures) {
-               assert(!tex.in_use);
-       }
-
        int prev_level_width = 1, prev_level_height = 1;
        GLuint prev_level_flow_tex = initial_flow_tex;
 
@@ -1117,7 +1164,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
 
                // Create a new texture; we could be fancy and render use a multi-level
                // texture, but meh.
-               GLuint grad0_tex = get_texture(GL_RG16F, level_width, level_height);
+               GLuint grad0_tex = pool.get_texture(GL_RG16F, level_width, level_height);
 
                // Find the derivative.
                {
@@ -1129,19 +1176,19 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                // level (sampled bilinearly; no fancy tricks) as a guide, then search from there.
 
                // Create an output flow texture.
-               GLuint flow_out_tex = get_texture(GL_RGB16F, width_patches, height_patches);
+               GLuint flow_out_tex = pool.get_texture(GL_RGB16F, width_patches, height_patches);
 
                // And draw.
                {
                        ScopedTimer timer("Motion search", &level_timer);
                        motion_search.exec(tex0_view, tex1_view, grad0_tex, prev_level_flow_tex, flow_out_tex, level_width, level_height, prev_level_width, prev_level_height, width_patches, height_patches);
                }
-               release_texture(grad0_tex);
+               pool.release_texture(grad0_tex);
 
                // Densification.
 
                // Set up an output texture (initially zero).
-               GLuint dense_flow_tex = get_texture(GL_RGB16F, level_width, level_height);
+               GLuint dense_flow_tex = pool.get_texture(GL_RGB16F, level_width, level_height);
                glClearTexImage(dense_flow_tex, 0, GL_RGB, GL_FLOAT, nullptr);
 
                // And draw.
@@ -1149,7 +1196,7 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                        ScopedTimer timer("Densification", &level_timer);
                        densify.exec(tex0_view, tex1_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches);
                }
-               release_texture(flow_out_tex);
+               pool.release_texture(flow_out_tex);
 
                // Everything below here in the loop belongs to variational refinement.
                ScopedTimer varref_timer("Variational refinement", &level_timer);
@@ -1161,14 +1208,14 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                // in pixels, not 0..1 normalized OpenGL texture coordinates.
                // This is because variational refinement depends so heavily on derivatives,
                // which are measured in intensity levels per pixel.
-               GLuint I_tex = get_texture(GL_R16F, level_width, level_height);
-               GLuint I_t_tex = get_texture(GL_R16F, level_width, level_height);
-               GLuint base_flow_tex = get_texture(GL_RG16F, level_width, level_height);
+               GLuint I_tex = pool.get_texture(GL_R16F, level_width, level_height);
+               GLuint I_t_tex = pool.get_texture(GL_R16F, level_width, level_height);
+               GLuint base_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height);
                {
                        ScopedTimer timer("Prewarping", &varref_timer);
                        prewarp.exec(tex0_view, tex1_view, dense_flow_tex, I_tex, I_t_tex, base_flow_tex, level_width, level_height);
                }
-               release_texture(dense_flow_tex);
+               pool.release_texture(dense_flow_tex);
                glDeleteTextures(1, &tex0_view);
                glDeleteTextures(1, &tex1_view);
 
@@ -1177,26 +1224,26 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                // textures overall, since sampling from the L1 cache is cheap.
                // (TODO: Verify that this is indeed faster than making separate
                // double-derivative textures.)
-               GLuint I_x_y_tex = get_texture(GL_RG16F, level_width, level_height);
-               GLuint beta_0_tex = get_texture(GL_R16F, level_width, level_height);
+               GLuint I_x_y_tex = pool.get_texture(GL_RG16F, level_width, level_height);
+               GLuint beta_0_tex = pool.get_texture(GL_R16F, level_width, level_height);
                {
                        ScopedTimer timer("First derivatives", &varref_timer);
                        derivatives.exec(I_tex, I_x_y_tex, beta_0_tex, level_width, level_height);
                }
-               release_texture(I_tex);
+               pool.release_texture(I_tex);
 
                // We need somewhere to store du and dv (the flow increment, relative
                // to the non-refined base flow u0 and v0). It starts at zero.
-               GLuint du_dv_tex = get_texture(GL_RG16F, level_width, level_height);
+               GLuint du_dv_tex = pool.get_texture(GL_RG16F, level_width, level_height);
                glClearTexImage(du_dv_tex, 0, GL_RG, GL_FLOAT, nullptr);
 
                // And for smoothness.
-               GLuint smoothness_x_tex = get_texture(GL_R16F, level_width, level_height);
-               GLuint smoothness_y_tex = get_texture(GL_R16F, level_width, level_height);
+               GLuint smoothness_x_tex = pool.get_texture(GL_R16F, level_width, level_height);
+               GLuint smoothness_y_tex = pool.get_texture(GL_R16F, level_width, level_height);
 
                // And finally for the equation set. See SetupEquations for
                // the storage format.
-               GLuint equation_tex = get_texture(GL_RGBA32UI, level_width, level_height);
+               GLuint equation_tex = pool.get_texture(GL_RGBA32UI, level_width, level_height);
 
                for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) {
                        // Calculate the smoothness terms between the neighboring pixels,
@@ -1220,12 +1267,12 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                        }
                }
 
-               release_texture(I_t_tex);
-               release_texture(I_x_y_tex);
-               release_texture(beta_0_tex);
-               release_texture(smoothness_x_tex);
-               release_texture(smoothness_y_tex);
-               release_texture(equation_tex);
+               pool.release_texture(I_t_tex);
+               pool.release_texture(I_x_y_tex);
+               pool.release_texture(beta_0_tex);
+               pool.release_texture(smoothness_x_tex);
+               pool.release_texture(smoothness_y_tex);
+               pool.release_texture(equation_tex);
 
                // Add the differential flow found by the variational refinement to the base flow,
                // giving the final flow estimate for this level.
@@ -1237,10 +1284,10 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
                        ScopedTimer timer("Add differential flow", &varref_timer);
                        add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height);
                }
-               release_texture(du_dv_tex);
+               pool.release_texture(du_dv_tex);
 
                if (prev_level_flow_tex != initial_flow_tex) {
-                       release_texture(prev_level_flow_tex);
+                       pool.release_texture(prev_level_flow_tex);
                }
                prev_level_flow_tex = base_flow_tex;
                prev_level_width = level_width;
@@ -1254,14 +1301,14 @@ GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
        if (finest_level == 0) {
                return prev_level_flow_tex;
        } else {
-               GLuint final_tex = get_texture(GL_RG16F, width, height);
+               GLuint final_tex = pool.get_texture(GL_RG16F, width, height);
                resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height);
-               release_texture(prev_level_flow_tex);
+               pool.release_texture(prev_level_flow_tex);
                return final_tex;
        }
 }
 
-GLuint DISComputeFlow::get_texture(GLenum format, GLuint width, GLuint height)
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height)
 {
        for (Texture &tex : textures) {
                if (!tex.in_use && tex.format == format &&
@@ -1282,7 +1329,7 @@ GLuint DISComputeFlow::get_texture(GLenum format, GLuint width, GLuint height)
        return tex.tex_num;
 }
 
-void DISComputeFlow::release_texture(GLuint tex_num)
+void TexturePool::release_texture(GLuint tex_num)
 {
        for (Texture &tex : textures) {
                if (tex.tex_num == tex_num) {
@@ -1423,10 +1470,10 @@ int main(int argc, char **argv)
        SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
        // SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
        SDL_Window *window = SDL_CreateWindow("OpenGL window",
-                       SDL_WINDOWPOS_UNDEFINED,
-                       SDL_WINDOWPOS_UNDEFINED,
-                       64, 64,
-                       SDL_WINDOW_OPENGL);
+               SDL_WINDOWPOS_UNDEFINED,
+               SDL_WINDOWPOS_UNDEFINED,
+               64, 64,
+               SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
        SDL_GLContext context = SDL_GL_CreateContext(window);
        assert(context != nullptr);
 
@@ -1465,8 +1512,24 @@ int main(int argc, char **argv)
        glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
        glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
 
+       int levels = find_num_levels(width1, height1);
+       GLuint tex0_gray, tex1_gray;
+       glCreateTextures(GL_TEXTURE_2D, 1, &tex0_gray);
+       glCreateTextures(GL_TEXTURE_2D, 1, &tex1_gray);
+       glTextureStorage2D(tex0_gray, levels, GL_R8, width1, height1);
+       glTextureStorage2D(tex1_gray, levels, GL_R8, width1, height1);
+
+       GrayscaleConversion gray;
+       gray.exec(tex0, tex0_gray, width1, height1);
+       glDeleteTextures(1, &tex0);
+       glGenerateTextureMipmap(tex0_gray);
+
+       gray.exec(tex1, tex1_gray, width1, height1);
+       glDeleteTextures(1, &tex1);
+       glGenerateTextureMipmap(tex1_gray);
+
        DISComputeFlow compute_flow(width1, height1);
-       GLuint final_tex = compute_flow.exec(tex0, tex1);
+       GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray);
 
        schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
        compute_flow.release_texture(final_tex);
@@ -1485,6 +1548,9 @@ int main(int argc, char **argv)
                                filename0, width, height, width1, height1);
                        exit(1);
                }
+               gray.exec(tex0, tex0_gray, width, height);
+               glGenerateTextureMipmap(tex0_gray);
+               glDeleteTextures(1, &tex0);
 
                GLuint tex1 = load_texture(filename1, &width, &height);
                if (width != width1 || height != height1) {
@@ -1492,11 +1558,17 @@ int main(int argc, char **argv)
                                filename1, width, height, width1, height1);
                        exit(1);
                }
+               gray.exec(tex1, tex1_gray, width, height);
+               glGenerateTextureMipmap(tex1_gray);
+               glDeleteTextures(1, &tex1);
+
+               GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray);
 
-               GLuint final_tex = compute_flow.exec(tex0, tex1);
                schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "");
                compute_flow.release_texture(final_tex);
        }
+       glDeleteTextures(1, &tex0_gray);
+       glDeleteTextures(1, &tex1_gray);
 
        while (!reads_in_progress.empty()) {
                finish_one_read(width1, height1);