SDL_Window *window;
-// Operating point 3 (10 Hz on CPU, excluding preprocessing).
-constexpr float patch_overlap_ratio = 0.75f;
-constexpr unsigned coarsest_level = 5;
-constexpr unsigned finest_level = 1;
-constexpr unsigned patch_size_pixels = 12;
-
// Weighting constants for the different parts of the variational refinement.
// These don't correspond 1:1 to the values given in the DIS paper,
// since we have different normalizations and ranges in some cases.
// although the error (EPE) seems to be fairly insensitive to the precise values.
// Only the relative values matter, so we fix alpha (the smoothness constant)
// at unity and tweak the others.
-float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
+static float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
bool enable_timing = true;
bool detailed_timing = false;
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
}
-Densify::Densify()
+Densify::Densify(const OperatingPoint &op)
+ : op(op)
{
densify_vs_obj = compile_shader(read_file("densify.vert"), GL_VERTEX_SHADER);
densify_fs_obj = compile_shader(read_file("densify.frag"), GL_FRAGMENT_SHADER);
bind_sampler(densify_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
glProgramUniform2f(densify_program, uniform_patch_size,
- float(patch_size_pixels) / level_width,
- float(patch_size_pixels) / level_height);
+ float(op.patch_size_pixels) / level_width,
+ float(op.patch_size_pixels) / level_height);
glViewport(0, 0, level_width, level_height);
glEnable(GL_BLEND);
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
}
-DISComputeFlow::DISComputeFlow(int width, int height)
- : width(width), height(height)
+DISComputeFlow::DISComputeFlow(int width, int height, const OperatingPoint &op)
+ : width(width), height(height), op(op), densify(op)
{
// Make some samplers.
glCreateSamplers(1, &nearest_sampler);
glBindVertexArray(vao);
ScopedTimer total_timer("Compute flow", &timers);
- for (int level = coarsest_level; level >= int(finest_level); --level) {
+ for (int level = op.coarsest_level; level >= int(op.finest_level); --level) {
char timer_name[256];
snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
ScopedTimer level_timer(timer_name, &total_timer);
int level_width = width >> level;
int level_height = height >> level;
- float patch_spacing_pixels = patch_size_pixels * (1.0f - patch_overlap_ratio);
+ float patch_spacing_pixels = op.patch_size_pixels * (1.0f - op.patch_overlap_ratio);
// Make sure we have patches at least every Nth pixel, e.g. for width=9
// and patch_spacing=3 (the default), we put out patch centers in
//
// Disabling this doesn't save any time (although we could easily make it so that
// it is more efficient), but it helps debug the motion search.
- if (enable_variational_refinement) {
+ if (op.variational_refinement) {
ScopedTimer timer("Add differential flow", &varref_timer);
add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height, num_layers);
}
}
// Scale up the flow to the final size (if needed).
- if (finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
+ if (op.finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
return prev_level_flow_tex;
} else {
GLuint final_tex = pool.get_texture(GL_RG16F, width, height, num_layers);
}
}
-Splat::Splat()
+Splat::Splat(const OperatingPoint &op)
+ : op(op)
{
splat_vs_obj = compile_shader(read_file("splat.vert"), GL_VERTEX_SHADER);
splat_fs_obj = compile_shader(read_file("splat.frag"), GL_FRAGMENT_SHADER);
bind_sampler(splat_program, uniform_image_tex, 0, image_tex, linear_sampler);
bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
- // FIXME: This is set to 1.0 right now so not to trigger Haswell's “PMA stall”.
- // Move to 2.0 later, or even 4.0.
- // (Since we have hole filling, it's not critical, but larger values seem to do
- // better than hole filling for large motion, blurs etc.)
- float splat_size = 1.0f; // 4x4 splat means 16x overdraw, 2x2 splat means 4x overdraw.
- glProgramUniform2f(splat_program, uniform_splat_size, splat_size / width, splat_size / height);
+ glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
glProgramUniform1f(splat_program, uniform_alpha, alpha);
glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
-Interpolate::Interpolate(int width, int height, int flow_level)
- : width(width), height(height), flow_level(flow_level) {
+Interpolate::Interpolate(int width, int height, const OperatingPoint &op)
+ : width(width), height(height), flow_level(op.finest_level), op(op), splat(op) {
// Set up the vertex data that will be shared between all passes.
float vertices[] = {
0.0f, 1.0f,
gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
glGenerateTextureMipmap(tex_gray);
- DISComputeFlow compute_flow(width1, height1);
+ OperatingPoint op = operating_point3;
+ if (!enable_variational_refinement) {
+ op.variational_refinement = false;
+ }
+ DISComputeFlow compute_flow(width1, height1, op);
if (enable_warmup) {
in_warmup = true;
spare_pbos.push(pbos[i]);
}
- DISComputeFlow compute_flow(width1, height1);
+ OperatingPoint op = operating_point3;
+ if (!enable_variational_refinement) {
+ op.variational_refinement = false;
+ }
+ DISComputeFlow compute_flow(width1, height1, op);
GrayscaleConversion gray;
- Interpolate interpolate(width1, height1, finest_level);
+ Interpolate interpolate(width1, height1, op);
GLuint tex_gray;
glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
#define _FLOW_H 1
// Code for computing optical flow between two images, and using it to interpolate
-// in-between frames. The main user interface is the Interpolate class.
+// in-between frames. The main user interface is the DISComputeFlow and Interpolate
+// classes (also GrayscaleConversion can be useful).
#include <stdint.h>
#include <epoxy/gl.h>
class ScopedTimer;
+// Predefined operating points from the paper.
+struct OperatingPoint {
+ unsigned coarsest_level; // TODO: Adjust dynamically based on the resolution?
+ unsigned finest_level;
+ unsigned search_iterations; // TODO: Not implemented yet! Halved from the paper.
+ unsigned patch_size_pixels; // TODO: Not implemented in the shader yet!
+ float patch_overlap_ratio;
+ bool variational_refinement; // TODO: Actually disabling this is not implemented yet!
+
+ // Not part of the original paper; used for interpolation.
+ // NOTE: Values much larger than 1.0 seems to trigger Haswell's “PMA stall”;
+ // the problem is not present on Broadwell and higher (there's a mitigation
+ // in the hardware, but Mesa doesn't enable it at the time of writing).
+ // Since we have hole filling, the holes from 1.0 are not critical,
+ // but larger values seem to do better than hole filling for large
+ // motion, blurs etc. since we have more candidates.
+ float splat_size;
+};
+
+// Operating point 1 (600 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point1 = {
+ 5, // Coarsest level.
+ 3, // Finest level.
+ 8, // Search iterations.
+ 8, // Patch size (pixels).
+ 0.30f, // Overlap ratio.
+ false, // Variational refinement.
+ 1.0f // Splat size (pixels).
+};
+
+// Operating point 2 (300 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point2 = {
+ 5, // Coarsest level.
+ 3, // Finest level.
+ 6, // Search iterations.
+ 8, // Patch size (pixels).
+ 0.40f, // Overlap ratio.
+ true, // Variational refinement.
+ 1.0f // Splat size (pixels).
+};
+
+// Operating point 3 (10 Hz on CPU, excluding preprocessing).
+// This is the only one that has been thorougly tested.
+static constexpr OperatingPoint operating_point3 = {
+ 5, // Coarsest level.
+ 1, // Finest level.
+ 8, // Search iterations.
+ 12, // Patch size (pixels).
+ 0.75f, // Overlap ratio.
+ true, // Variational refinement.
+ 4.0f // Splat size (pixels).
+};
+
+// Operating point 4 (0.5 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point4 = {
+ 5, // Coarsest level.
+ 0, // Finest level.
+ 128, // Search iterations.
+ 12, // Patch size (pixels).
+ 0.75f, // Overlap ratio.
+ true, // Variational refinement.
+ 8.0f // Splat size (pixels).
+};
+
// A class that caches FBOs that render to a given set of textures.
// It never frees anything, so it is only suitable for rendering to
// the same (small) set of textures over and over again.
// weight in the B channel. Dividing R and G by B gives the normalized values.
class Densify {
public:
- Densify();
+ Densify(const OperatingPoint &op);
void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers);
private:
+ OperatingPoint op;
PersistentFBOSet<1> fbos;
GLuint densify_vs_obj;
class DISComputeFlow {
public:
- DISComputeFlow(int width, int height);
+ DISComputeFlow(int width, int height, const OperatingPoint &op);
enum FlowDirection {
FORWARD,
GLuint initial_flow_tex;
GLuint vertex_vbo, vao;
TexturePool pool;
+ const OperatingPoint op;
// The various passes.
Sobel sobel;
// radius fills most of the holes.
class Splat {
public:
- Splat();
+ Splat(const OperatingPoint &op);
// alpha is the time of the interpolated frame (0..1).
void exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
private:
+ const OperatingPoint op;
PersistentFBOSetWithDepth<1> fbos;
GLuint splat_vs_obj;
class Interpolate {
public:
- Interpolate(int width, int height, int flow_level);
+ Interpolate(int width, int height, const OperatingPoint &op);
// Returns a texture that must be released with release_texture()
// after use. image_tex must be a two-layer RGBA8 texture with mipmaps
int width, height, flow_level;
GLuint vertex_vbo, vao;
TexturePool pool;
+ const OperatingPoint op;
Splat splat;
HoleFill hole_fill;