stack<GLuint> spare_pbos;
deque<ReadInProgress> reads_in_progress;
+int find_num_levels(int width, int height)
+{
+ int levels = 1;
+ for (int w = width, h = height; w > 1 || h > 1; ) {
+ w >>= 1;
+ h >>= 1;
+ ++levels;
+ }
+ return levels;
+}
+
string read_file(const string &filename)
{
FILE *fp = fopen(filename.c_str(), "r");
}
// For whatever reason, SDL doesn't support converting to YUV surfaces
- // nor grayscale, so we'll do it (slowly) ourselves.
- SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA8888, /*flags=*/0);
+ // nor grayscale, so we'll do it ourselves.
+ SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA32, /*flags=*/0);
if (rgb_surf == nullptr) {
fprintf(stderr, "SDL_ConvertSurfaceFormat(%s): %s\n", filename, SDL_GetError());
exit(1);
unsigned width = rgb_surf->w, height = rgb_surf->h;
const uint8_t *sptr = (uint8_t *)rgb_surf->pixels;
- unique_ptr<uint8_t[]> pix(new uint8_t[width * height]);
+ unique_ptr<uint8_t[]> pix(new uint8_t[width * height * 4]);
// Extract the Y component, and convert to bottom-left origin.
for (unsigned y = 0; y < height; ++y) {
unsigned y2 = height - 1 - y;
- for (unsigned x = 0; x < width; ++x) {
- uint8_t r = sptr[(y2 * width + x) * 4 + 3];
- uint8_t g = sptr[(y2 * width + x) * 4 + 2];
- uint8_t b = sptr[(y2 * width + x) * 4 + 1];
-
- // Rec. 709.
- pix[y * width + x] = lrintf(r * 0.2126f + g * 0.7152f + b * 0.0722f);
- }
+ memcpy(pix.get() + y * width * 4, sptr + y2 * rgb_surf->pitch, width * 4);
}
SDL_FreeSurface(rgb_surf);
- int levels = 1;
- for (int w = width, h = height; w > 1 || h > 1; ) {
- w >>= 1;
- h >>= 1;
- ++levels;
- }
-
GLuint tex;
glCreateTextures(GL_TEXTURE_2D, 1, &tex);
- glTextureStorage2D(tex, levels, GL_R8, width, height);
- glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RED, GL_UNSIGNED_BYTE, pix.get());
- glGenerateTextureMipmap(tex);
+ glTextureStorage2D(tex, 1, GL_RGBA8, width, height);
+ glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pix.get());
*width_ret = width;
*height_ret = height;
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
}
+// Convert RGB to grayscale, using Rec. 709 coefficients.
+class GrayscaleConversion {
+public:
+ GrayscaleConversion();
+ void exec(GLint tex, GLint gray_tex, int width, int height);
+
+private:
+ PersistentFBOSet<1> fbos;
+ GLuint gray_vs_obj;
+ GLuint gray_fs_obj;
+ GLuint gray_program;
+ GLuint gray_vao;
+
+ GLuint uniform_tex;
+};
+
+GrayscaleConversion::GrayscaleConversion()
+{
+ gray_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ gray_fs_obj = compile_shader(read_file("gray.frag"), GL_FRAGMENT_SHADER);
+ gray_program = link_program(gray_vs_obj, gray_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &gray_vao);
+ glBindVertexArray(gray_vao);
+
+ GLint position_attrib = glGetAttribLocation(gray_program, "position");
+ glEnableVertexArrayAttrib(gray_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_tex = glGetUniformLocation(gray_program, "tex");
+}
+
+void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height)
+{
+ glUseProgram(gray_program);
+ bind_sampler(gray_program, uniform_tex, 0, tex, nearest_sampler);
+
+ glViewport(0, 0, width, height);
+ fbos.render_to(gray_tex);
+ glBindVertexArray(gray_vao);
+ glUseProgram(gray_program);
+ glDisable(GL_BLEND);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
// Compute gradients in every point, used for the motion search.
// The DIS paper doesn't actually mention how these are computed,
// but seemingly, a 3x3 Sobel operator is used here (at least in
GLuint sobel_program;
GLuint sobel_vao;
- GLuint uniform_tex, uniform_image_size;
+ GLuint uniform_tex;
};
Sobel::Sobel()
void Sobel::exec(GLint tex0_view, GLint grad0_tex, int level_width, int level_height)
{
glUseProgram(sobel_program);
- glBindTextureUnit(0, tex0_view);
- glBindSampler(0, nearest_sampler);
- glProgramUniform1i(sobel_program, uniform_tex, 0);
+ bind_sampler(sobel_program, uniform_tex, 0, tex0_view, nearest_sampler);
glViewport(0, 0, level_width, level_height);
fbos.render_to(grad0_tex);
GLuint motion_search_program;
GLuint motion_search_vao;
- GLuint uniform_image_size, uniform_inv_image_size, uniform_flow_size, uniform_inv_prev_level_size;
+ GLuint uniform_inv_image_size, uniform_inv_prev_level_size;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_grad0_tex, uniform_flow_tex;
};
glEnableVertexArrayAttrib(motion_search_vao, position_attrib);
glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
- uniform_image_size = glGetUniformLocation(motion_search_program, "image_size");
uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
- uniform_flow_size = glGetUniformLocation(motion_search_program, "flow_size");
uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
uniform_image0_tex = glGetUniformLocation(motion_search_program, "image0_tex");
uniform_image1_tex = glGetUniformLocation(motion_search_program, "image1_tex");
bind_sampler(motion_search_program, uniform_grad0_tex, 2, grad0_tex, zero_border_sampler);
bind_sampler(motion_search_program, uniform_flow_tex, 3, flow_tex, linear_sampler);
- glProgramUniform2f(motion_search_program, uniform_image_size, level_width, level_height);
glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
- glProgramUniform2f(motion_search_program, uniform_flow_size, width_patches, height_patches);
glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
glViewport(0, 0, width_patches, height_patches);
GLuint densify_program;
GLuint densify_vao;
- GLuint uniform_width_patches, uniform_patch_size, uniform_patch_spacing;
+ GLuint uniform_patch_size, uniform_patch_spacing;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
- GLuint uniform_flow_size;
};
Densify::Densify()
glEnableVertexArrayAttrib(densify_vao, position_attrib);
glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
- uniform_width_patches = glGetUniformLocation(densify_program, "width_patches");
uniform_patch_size = glGetUniformLocation(densify_program, "patch_size");
uniform_patch_spacing = glGetUniformLocation(densify_program, "patch_spacing");
uniform_image0_tex = glGetUniformLocation(densify_program, "image0_tex");
uniform_image1_tex = glGetUniformLocation(densify_program, "image1_tex");
uniform_flow_tex = glGetUniformLocation(densify_program, "flow_tex");
- uniform_flow_size = glGetUniformLocation(densify_program, "flow_size");
}
void Densify::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches)
bind_sampler(densify_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
bind_sampler(densify_program, uniform_flow_tex, 2, flow_tex, nearest_sampler);
- glProgramUniform1i(densify_program, uniform_width_patches, width_patches);
glProgramUniform2f(densify_program, uniform_patch_size,
float(patch_size_pixels) / level_width,
float(patch_size_pixels) / level_height);
- glProgramUniform2f(densify_program, uniform_flow_size,
- width_patches,
- height_patches);
float patch_spacing_x = float(level_width - patch_size_pixels) / (width_patches - 1);
float patch_spacing_y = float(level_height - patch_size_pixels) / (height_patches - 1);
GLuint prewarp_vao;
GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex;
- GLuint uniform_image_size;
};
Prewarp::Prewarp()
uniform_image0_tex = glGetUniformLocation(prewarp_program, "image0_tex");
uniform_image1_tex = glGetUniformLocation(prewarp_program, "image1_tex");
uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
-
- uniform_image_size = glGetUniformLocation(prewarp_program, "image_size");
}
void Prewarp::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height)
bind_sampler(prewarp_program, uniform_image1_tex, 1, tex1_view, linear_sampler);
bind_sampler(prewarp_program, uniform_flow_tex, 2, flow_tex, nearest_sampler);
- glProgramUniform2f(prewarp_program, uniform_image_size, level_width, level_height);
-
glViewport(0, 0, level_width, level_height);
glDisable(GL_BLEND);
glBindVertexArray(prewarp_vao);
GLuint uniform_diff_flow_tex;
GLuint uniform_equation_tex;
GLuint uniform_smoothness_x_tex, uniform_smoothness_y_tex;
+ GLuint uniform_phase;
};
SOR::SOR()
{
- sor_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
+ sor_vs_obj = compile_shader(read_file("sor.vert"), GL_VERTEX_SHADER);
sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER);
sor_program = link_program(sor_vs_obj, sor_fs_obj);
uniform_equation_tex = glGetUniformLocation(sor_program, "equation_tex");
uniform_smoothness_x_tex = glGetUniformLocation(sor_program, "smoothness_x_tex");
uniform_smoothness_y_tex = glGetUniformLocation(sor_program, "smoothness_y_tex");
+ uniform_phase = glGetUniformLocation(sor_program, "phase");
}
void SOR::exec(GLuint diff_flow_tex, GLuint equation_tex, GLuint smoothness_x_tex, GLuint smoothness_y_tex, int level_width, int level_height, int num_iterations)
bind_sampler(sor_program, uniform_smoothness_y_tex, 2, smoothness_y_tex, zero_border_sampler);
bind_sampler(sor_program, uniform_equation_tex, 3, equation_tex, nearest_sampler);
+ // NOTE: We bind to the texture we are rendering from, but we never write any value
+ // that we read in the same shader pass (we call discard for red values when we compute
+ // black, and vice versa), and we have barriers between the passes, so we're fine
+ // as per the spec.
glViewport(0, 0, level_width, level_height);
glDisable(GL_BLEND);
glBindVertexArray(sor_vao);
- fbos.render_to(diff_flow_tex); // NOTE: Bind to same as we render from!
+ fbos.render_to(diff_flow_tex);
for (int i = 0; i < num_iterations; ++i) {
+ glProgramUniform1i(sor_program, uniform_phase, 0);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ glProgramUniform1i(sor_program, uniform_phase, 1);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (i != num_iterations - 1) {
glTextureBarrier();
bool ended = false;
};
+class TexturePool {
+public:
+ GLuint get_texture(GLenum format, GLuint width, GLuint height);
+ void release_texture(GLuint tex_num);
+
+private:
+ struct Texture {
+ GLuint tex_num;
+ GLenum format;
+ GLuint width, height;
+ bool in_use = false;
+ };
+ vector<Texture> textures;
+};
+
class DISComputeFlow {
public:
DISComputeFlow(int width, int height);
// Returns a texture that must be released with release_texture()
// after use.
GLuint exec(GLuint tex0, GLuint tex1);
- void release_texture(GLuint tex);
+
+ void release_texture(GLuint tex) {
+ pool.release_texture(tex);
+ }
private:
int width, height;
GLuint initial_flow_tex;
+ TexturePool pool;
// The various passes.
Sobel sobel;
SOR sor;
AddBaseFlow add_base_flow;
ResizeFlow resize_flow;
-
- struct Texture {
- GLuint tex_num;
- GLenum format;
- GLuint width, height;
- bool in_use = false;
- };
- vector<Texture> textures;
-
- GLuint get_texture(GLenum format, GLuint width, GLuint height);
};
DISComputeFlow::DISComputeFlow(int width, int height)
GLuint DISComputeFlow::exec(GLuint tex0, GLuint tex1)
{
- for (const Texture &tex : textures) {
- assert(!tex.in_use);
- }
-
int prev_level_width = 1, prev_level_height = 1;
GLuint prev_level_flow_tex = initial_flow_tex;
// Make sure we always read from the correct level; the chosen
// mipmapping could otherwise be rather unpredictable, especially
// during motion search.
- // TODO: create these beforehand, and stop leaking them.
GLuint tex0_view, tex1_view;
glGenTextures(1, &tex0_view);
glTextureView(tex0_view, GL_TEXTURE_2D, tex0, GL_R8, level, 1, 0, 1);
// Create a new texture; we could be fancy and render use a multi-level
// texture, but meh.
- GLuint grad0_tex = get_texture(GL_RG16F, level_width, level_height);
+ GLuint grad0_tex = pool.get_texture(GL_RG16F, level_width, level_height);
// Find the derivative.
{
// level (sampled bilinearly; no fancy tricks) as a guide, then search from there.
// Create an output flow texture.
- GLuint flow_out_tex = get_texture(GL_RGB16F, width_patches, height_patches);
+ GLuint flow_out_tex = pool.get_texture(GL_RGB16F, width_patches, height_patches);
// And draw.
{
ScopedTimer timer("Motion search", &level_timer);
motion_search.exec(tex0_view, tex1_view, grad0_tex, prev_level_flow_tex, flow_out_tex, level_width, level_height, prev_level_width, prev_level_height, width_patches, height_patches);
}
- release_texture(grad0_tex);
+ pool.release_texture(grad0_tex);
// Densification.
// Set up an output texture (initially zero).
- GLuint dense_flow_tex = get_texture(GL_RGB16F, level_width, level_height);
+ GLuint dense_flow_tex = pool.get_texture(GL_RGB16F, level_width, level_height);
glClearTexImage(dense_flow_tex, 0, GL_RGB, GL_FLOAT, nullptr);
// And draw.
ScopedTimer timer("Densification", &level_timer);
densify.exec(tex0_view, tex1_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches);
}
- release_texture(flow_out_tex);
+ pool.release_texture(flow_out_tex);
// Everything below here in the loop belongs to variational refinement.
ScopedTimer varref_timer("Variational refinement", &level_timer);
// in pixels, not 0..1 normalized OpenGL texture coordinates.
// This is because variational refinement depends so heavily on derivatives,
// which are measured in intensity levels per pixel.
- GLuint I_tex = get_texture(GL_R16F, level_width, level_height);
- GLuint I_t_tex = get_texture(GL_R16F, level_width, level_height);
- GLuint base_flow_tex = get_texture(GL_RG16F, level_width, level_height);
+ GLuint I_tex = pool.get_texture(GL_R16F, level_width, level_height);
+ GLuint I_t_tex = pool.get_texture(GL_R16F, level_width, level_height);
+ GLuint base_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height);
{
ScopedTimer timer("Prewarping", &varref_timer);
prewarp.exec(tex0_view, tex1_view, dense_flow_tex, I_tex, I_t_tex, base_flow_tex, level_width, level_height);
}
- release_texture(dense_flow_tex);
+ pool.release_texture(dense_flow_tex);
+ glDeleteTextures(1, &tex0_view);
+ glDeleteTextures(1, &tex1_view);
// Calculate I_x and I_y. We're only calculating first derivatives;
// the others will be taken on-the-fly in order to sample from fewer
// textures overall, since sampling from the L1 cache is cheap.
// (TODO: Verify that this is indeed faster than making separate
// double-derivative textures.)
- GLuint I_x_y_tex = get_texture(GL_RG16F, level_width, level_height);
- GLuint beta_0_tex = get_texture(GL_R16F, level_width, level_height);
+ GLuint I_x_y_tex = pool.get_texture(GL_RG16F, level_width, level_height);
+ GLuint beta_0_tex = pool.get_texture(GL_R16F, level_width, level_height);
{
ScopedTimer timer("First derivatives", &varref_timer);
derivatives.exec(I_tex, I_x_y_tex, beta_0_tex, level_width, level_height);
}
- release_texture(I_tex);
+ pool.release_texture(I_tex);
// We need somewhere to store du and dv (the flow increment, relative
// to the non-refined base flow u0 and v0). It starts at zero.
- GLuint du_dv_tex = get_texture(GL_RG16F, level_width, level_height);
+ GLuint du_dv_tex = pool.get_texture(GL_RG16F, level_width, level_height);
glClearTexImage(du_dv_tex, 0, GL_RG, GL_FLOAT, nullptr);
// And for smoothness.
- GLuint smoothness_x_tex = get_texture(GL_R16F, level_width, level_height);
- GLuint smoothness_y_tex = get_texture(GL_R16F, level_width, level_height);
+ GLuint smoothness_x_tex = pool.get_texture(GL_R16F, level_width, level_height);
+ GLuint smoothness_y_tex = pool.get_texture(GL_R16F, level_width, level_height);
// And finally for the equation set. See SetupEquations for
// the storage format.
- GLuint equation_tex = get_texture(GL_RGBA32UI, level_width, level_height);
+ GLuint equation_tex = pool.get_texture(GL_RGBA32UI, level_width, level_height);
for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) {
// Calculate the smoothness terms between the neighboring pixels,
}
}
- release_texture(I_t_tex);
- release_texture(I_x_y_tex);
- release_texture(beta_0_tex);
- release_texture(smoothness_x_tex);
- release_texture(smoothness_y_tex);
- release_texture(equation_tex);
+ pool.release_texture(I_t_tex);
+ pool.release_texture(I_x_y_tex);
+ pool.release_texture(beta_0_tex);
+ pool.release_texture(smoothness_x_tex);
+ pool.release_texture(smoothness_y_tex);
+ pool.release_texture(equation_tex);
// Add the differential flow found by the variational refinement to the base flow,
// giving the final flow estimate for this level.
ScopedTimer timer("Add differential flow", &varref_timer);
add_base_flow.exec(base_flow_tex, du_dv_tex, level_width, level_height);
}
- release_texture(du_dv_tex);
+ pool.release_texture(du_dv_tex);
if (prev_level_flow_tex != initial_flow_tex) {
- release_texture(prev_level_flow_tex);
+ pool.release_texture(prev_level_flow_tex);
}
prev_level_flow_tex = base_flow_tex;
prev_level_width = level_width;
if (finest_level == 0) {
return prev_level_flow_tex;
} else {
- GLuint final_tex = get_texture(GL_RG16F, width, height);
+ GLuint final_tex = pool.get_texture(GL_RG16F, width, height);
resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height);
- release_texture(prev_level_flow_tex);
+ pool.release_texture(prev_level_flow_tex);
return final_tex;
}
}
-GLuint DISComputeFlow::get_texture(GLenum format, GLuint width, GLuint height)
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height)
{
for (Texture &tex : textures) {
if (!tex.in_use && tex.format == format &&
return tex.tex_num;
}
-void DISComputeFlow::release_texture(GLuint tex_num)
+void TexturePool::release_texture(GLuint tex_num)
{
for (Texture &tex : textures) {
if (tex.tex_num == tex_num) {
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
// SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
SDL_Window *window = SDL_CreateWindow("OpenGL window",
- SDL_WINDOWPOS_UNDEFINED,
- SDL_WINDOWPOS_UNDEFINED,
- 64, 64,
- SDL_WINDOW_OPENGL);
+ SDL_WINDOWPOS_UNDEFINED,
+ SDL_WINDOWPOS_UNDEFINED,
+ 64, 64,
+ SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
SDL_GLContext context = SDL_GL_CreateContext(window);
assert(context != nullptr);
glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+ int levels = find_num_levels(width1, height1);
+ GLuint tex0_gray, tex1_gray;
+ glCreateTextures(GL_TEXTURE_2D, 1, &tex0_gray);
+ glCreateTextures(GL_TEXTURE_2D, 1, &tex1_gray);
+ glTextureStorage2D(tex0_gray, levels, GL_R8, width1, height1);
+ glTextureStorage2D(tex1_gray, levels, GL_R8, width1, height1);
+
+ GrayscaleConversion gray;
+ gray.exec(tex0, tex0_gray, width1, height1);
+ glDeleteTextures(1, &tex0);
+ glGenerateTextureMipmap(tex0_gray);
+
+ gray.exec(tex1, tex1_gray, width1, height1);
+ glDeleteTextures(1, &tex1);
+ glGenerateTextureMipmap(tex1_gray);
+
DISComputeFlow compute_flow(width1, height1);
- GLuint final_tex = compute_flow.exec(tex0, tex1);
+ GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray);
schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
compute_flow.release_texture(final_tex);
filename0, width, height, width1, height1);
exit(1);
}
+ gray.exec(tex0, tex0_gray, width, height);
+ glGenerateTextureMipmap(tex0_gray);
+ glDeleteTextures(1, &tex0);
GLuint tex1 = load_texture(filename1, &width, &height);
if (width != width1 || height != height1) {
filename1, width, height, width1, height1);
exit(1);
}
+ gray.exec(tex1, tex1_gray, width, height);
+ glGenerateTextureMipmap(tex1_gray);
+ glDeleteTextures(1, &tex1);
+
+ GLuint final_tex = compute_flow.exec(tex0_gray, tex1_gray);
- GLuint final_tex = compute_flow.exec(tex0, tex1);
schedule_read(final_tex, width1, height1, filename0, filename1, flow_filename, "");
compute_flow.release_texture(final_tex);
}
+ glDeleteTextures(1, &tex0_gray);
+ glDeleteTextures(1, &tex1_gray);
while (!reads_in_progress.empty()) {
finish_one_read(width1, height1);