From c7f5b3e00a0b6581b42deb5b055420fbf2a629e5 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 12 Jul 2018 00:44:17 +0200 Subject: [PATCH] Start working on variational refinement. --- flow.cpp | 73 +++++- prewarp.frag | 20 ++ variational_refinement.txt | 520 +++++++++++++++++++++++++++++++++++++ 3 files changed, 612 insertions(+), 1 deletion(-) create mode 100644 prewarp.frag create mode 100644 variational_refinement.txt diff --git a/flow.cpp b/flow.cpp index b46e0a1..87cc06f 100644 --- a/flow.cpp +++ b/flow.cpp @@ -404,6 +404,67 @@ void Densify::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint d glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width_patches * height_patches); } +// Warp I_1 to I_w, and then compute the mean (I) and difference (I_t) of +// I_0 and I_w. The prewarping is what enables us to solve the variational +// flow for du,dv instead of u,v. +// +// See variational_refinement.txt for more information. +class Prewarp { +public: + Prewarp(); + void exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height); + +private: + GLuint prewarp_vs_obj; + GLuint prewarp_fs_obj; + GLuint prewarp_program; + GLuint prewarp_vao; + + GLuint uniform_image0_tex, uniform_image1_tex, uniform_flow_tex; +}; + +Prewarp::Prewarp() +{ + prewarp_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); + prewarp_fs_obj = compile_shader(read_file("prewarp.frag"), GL_FRAGMENT_SHADER); + prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj); + + // Set up the VAO containing all the required position/texcoord data. + glCreateVertexArrays(1, &prewarp_vao); + glBindVertexArray(prewarp_vao); + glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo); + + GLint position_attrib = glGetAttribLocation(prewarp_program, "position"); + glEnableVertexArrayAttrib(prewarp_vao, position_attrib); + glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); + + uniform_image0_tex = glGetUniformLocation(prewarp_program, "image0_tex"); + uniform_image1_tex = glGetUniformLocation(prewarp_program, "image1_tex"); + uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex"); +} + +void Prewarp::exec(GLuint tex0_view, GLuint tex1_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height) +{ + glUseProgram(prewarp_program); + + bind_sampler(prewarp_program, uniform_image0_tex, 0, tex0_view, nearest_sampler); + bind_sampler(prewarp_program, uniform_image1_tex, 1, tex1_view, linear_sampler); + bind_sampler(prewarp_program, uniform_flow_tex, 2, flow_tex, nearest_sampler); + + GLuint prewarp_fbo; // TODO: cleanup + glCreateFramebuffers(1, &prewarp_fbo); + GLenum bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 }; + glNamedFramebufferDrawBuffers(prewarp_fbo, 2, bufs); + glNamedFramebufferTexture(prewarp_fbo, GL_COLOR_ATTACHMENT0, I_tex, 0); + glNamedFramebufferTexture(prewarp_fbo, GL_COLOR_ATTACHMENT1, I_t_tex, 0); + + glViewport(0, 0, level_width, level_height); + glDisable(GL_BLEND); + glBindVertexArray(prewarp_vao); + glBindFramebuffer(GL_FRAMEBUFFER, prewarp_fbo); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); +} + int main(void) { if (SDL_Init(SDL_INIT_EVERYTHING) == -1) { @@ -464,6 +525,7 @@ int main(void) Sobel sobel; MotionSearch motion_search; Densify densify; + Prewarp prewarp; GLuint query; glGenQueries(1, &query); @@ -516,7 +578,16 @@ int main(void) // And draw. densify.exec(tex0_view, tex1_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches); - // TODO: Variational refinement. + // Everything below here in the loop belongs to variational refinement. + // It is not done yet. + + // Prewarping; create I and I_t. + GLuint I_tex, I_t_tex; + glCreateTextures(GL_TEXTURE_2D, 1, &I_tex); + glCreateTextures(GL_TEXTURE_2D, 1, &I_t_tex); + glTextureStorage2D(I_tex, 1, GL_R16F, level_width, level_height); + glTextureStorage2D(I_t_tex, 1, GL_R16F, level_width, level_height); + prewarp.exec(tex0_view, tex1_view, dense_flow_tex, I_tex, I_t_tex, level_width, level_height); prev_level_flow_tex = dense_flow_tex; } diff --git a/prewarp.frag b/prewarp.frag new file mode 100644 index 0000000..1bf99f7 --- /dev/null +++ b/prewarp.frag @@ -0,0 +1,20 @@ +#version 450 core + +// Warps I_1 according to the flow, then computes the mean and difference to I_0. + +in vec2 tc; +out float I, I_t; + +uniform sampler2D image0_tex, image1_tex, flow_tex; + +void main() +{ + vec3 flow = texture(flow_tex, tc).xyz; + flow.xy /= flow.z; // Normalize the sum coming out of the densification. + + float I_0 = texture(image0_tex, tc).x; + float I_w = texture(image1_tex, tc + flow.xy).x; // NOTE: This is effectively a reverse warp since texture() is a gather operation and flow is conceptually scatter. + + I = 0.5f * (I_0 + I_w); + I_t = I_w - I_0; +} diff --git a/variational_refinement.txt b/variational_refinement.txt new file mode 100644 index 0000000..58d0663 --- /dev/null +++ b/variational_refinement.txt @@ -0,0 +1,520 @@ +Variational refinement -- an introduction and derivation + +The variational refinement is probably the most difficult part of the +algorithm to understand, in part because the description in most papers +are very heavy on notation and rather light on exposition. I've tried +to give a somewhat friendlier introduction to this specific algorithm +below. + +The general idea is fairly simple; we try to optimize the flow field +as a whole, by minimizing some mathematical notion of badness expressed +as an energy function. The one used in the dense inverse search paper +[Kroeger05; se references below] has this form: + + E(U) = int( σ Ψ(E_I) + γ Ψ(E_G) + α Ψ(E_S) ) dx + +where Ψ(a²) = sqrt(a² + ε²) for some small constant ε = 0.001, and +σ, γ, α are empirically set weighting constants. (We'll get to what the +different enery terms are in a minute.) The integral is, for all practical +purposes, just a sum over all the pixels in the flow. + +In general, such formulas are nonconvex and highly nonlinear, so we +cannot hope to find a global minimum -- but if we start from the flow +generated by the motion search, we can at least hope to make it somehow +better by walking towards a local minimum. (In fact, there are many +methods for optical flow that work _only_ by such minimization, +so the word “refinement” is maybe not doing the method justice. +One could just as well say that the motion search is a way of +finding a reasonable starting point for the optimization.) + +The dense inverse search paper [Kroeger05; se references below] sets +up the energy terms as described by some motion tensors and normalizations, +then says simply that it is optimized by “θ_vo fixed point iterations +and θ_vi iterations of Successive Over Relaxation (SOR) for the linear +system”. It's not immediately obvious what this means, but it gives +a reference to [Brox04]. However, that paper describes a numerical +approximation scheme that is _far_ more complicated than what the DIS +code actually does. + +Rather, one must look at the other main reference they are giving, +which is [Weinzaepfel13], describing a system called DeepFlow. +DIS borrows most of the exposition and code for its variational +refinement from DeepFlow, just removing some terms and fixing up +a few issues here and there. (There are some slight differences in +the paper, like the use of ∂z instead of ∂t, but that looks mostly +like an error to me.) Unfortunately, that paper in turn refers to +[Brox11], which appears no more useful in clearing up the notation +to me. + +However, digging down in the references, finally one finds [Zimmer11], +which is where the tensor notation appears to come from. This allows +us to look at the first term in the energy, E_I, which comes from the +intensity constant assumption. The basic idea is optical flow nearly +by definition should preserve intensity after the warp: + + I_0(x + u) = I_1(x) + +where I_0 is the first picture, I_1 is the second, x is any 2D +coordinate and u is the flow at x (which we are optimizing over). +In general, we'll be optimizing over the entire field of u +(potentially hundreds of thousands of values), but we'll be looking +mostly at individual points, so we'll skip the coordinates when we +can (e.g. we write u instead of or u(x, y)). u is of course the 2D +flow, although often, we'll write its components separately as u and v +instead of as a vector u. + +Before we go further, we need to add some more notation: + + * I_x is the partial derivative of I with respect to x (at some + point), and similarly for I_y. These do not depend on u, + so they can be precalculated before the optimization. + * I_xx is the double partial derivative of I, and similar for + I_yy and I_xy (the latter is the same as I_yx). + * I_t is the temporal derivative of I, ie. in practice just + I_t(x) = I_1(x) - I_0(x). + +Returning now to our original assertion: + + I_0(x + u) = I_1(x) + +Classically in optical flow, one assumes that the flow is smooth +and linear around the point x, which allows one to approximate this +equation by + + I_x u + I_y v + I_t = 0 + +This is usually simply called “the optical flow constraint”, +and gives rise to a very natural part of the energy: + + E_I = I_x u + I_y v + I_t + +Remember that we send E_I through the function Ψ(a²) = sqrt(a² + ε²), +so clearly Ψ(E_I) will be minimized if indeed E_I is zero. + +At this point, many papers start talking about Euler-Lagrange +multivariate equations, which is a fairly daunting concept +(at least the Wikipedia page is suitable for scaring small children). +However, for the first two terms, we don't need its general form, +and it reduces to something much simpler; just differentiate the energy +by u and equate the result to zero (finding some minimum; it can't be +a maximum, since *wave hands intensely*). Then differentiate the energy +by v and set that to zero, too; now you have two equations in two +unknowns (or, since we're optimizing over a field, maybe 500k +equations in 500k unknowns -- although the equation set will be +very sparse), which is hopefully solvable using linear methods. +We'll look at what this gives for E_I in a moment, then try to apply +the same notions to E_G and E_S later. + +First we modify E_I a bit by adding some normalization: + + E_I = β_0 (I_x u + I_y v + I_t) + +where β_0 = 1/(abs(∇I)² + 0.01). Note that β_0 depends on I only, +so for the purposes of optimizing u, it's a constant and can be +precomputed across I. (β_0 will, of course, depend on x, but so +do all the other terms in the equation.) + +Now we give it to Maple, differentiating first by u and then by v: + +> M := (u,v) -> B_0 * (I_x * u + I_y * v + I_t); + M := (u, v) -> B_0 (I_x u + I_y v + I_t) + +> diff(sqrt(M(u,v)^2 + e), u); + 2 + B_0 (I_x u + I_y v + I_t) I_x + ------------------------------------ + 2 2 1/2 + (B_0 (I_x u + I_y v + I_t) + e) + +> diff(sqrt(M(u,v)^2 + e), v); + 2 + B_0 (I_x u + I_y v + I_t) I_y + ------------------------------------ + 2 2 1/2 + (B_0 (I_x u + I_y v + I_t) + e) + + +So these are the two expressions to be set to zero (for each +point). We'll notice immediately that this isn't very linear +in u and v, so here's where the “fixed point iterations” come in; +we simply assume that our previous values for u and v are +approximately good enough for the denominator, and optimize +them in the numerator only. Then we get new values that are +hopefully a bit closer, which we can then use for the +denominator, and so on. (This is seemingly an old technique; +[Brox05] cites [Ciarlet78]. It is justifiable in the sense +that the only thing really held constant is the derivative +of the penalizer.) In other words, if we define the constant + + k1 = β_0² / sqrt(β_0² (I_x u' + I_y v' + I_t)² + ε²) + +(where u' and v' are the guesses for u and v from the previous +iteration) + +we have the much more manageable + + k1 I_x² u + k1 I_x I_y v = - k1 I_t + k1 I_x I_y u + k1 I_y² v = - k1 I_t + +ie., two linear equations in u and v. Now, you will notice two +immediate problems by this equation set: + + * The factor k1 is completely useless, since it's just multiplied + in everywhere. + * The set of equations is colinear (the determinant of the matrix + is zero), and thus there is an infinite number of possible + solutions—this is known as the so-called “aperture problem”. + It shouldn't be surprising, though, as we cannot expect that + starting with a single constraint should allow us to solve + for two unknowns. + +However, both problems will go away as soon as we start adding +more terms, so let's look at the gradient constancy term E_G next. +It is fairly similar to the brightness constancy term, except it +uses the (spatial) gradient instead of intensity: + + ∇I_0(x + u) = ∇I_1(x) + +or equivalently (by definition): + + (∂I/∂x)_0(x + u) = (∂I/∂x)_1(x) + (∂I/∂y)_0(x + u) = (∂I/∂y)_1(x) + +The idea is that this is more robust to changes in lighting. +It doesn't replace the intensity term, but augments it; the weighting +constants σ and γ control their relative importance. Also note that +this actually gives us two independent equations, unlike the brightness +constancy term. + +However, it is not obvious at all how to discretize this. In particular, +most papers, including [Brox04], appear to want _not_ to make any linear +assumptions of the flow in this case, and end up with tons of terms. +(The DIS and DeepFlow papers do, again, use some tensor notation that +I do not understand, but I'm not convinced it actually contains any +of the discretization.) + +Yet more paper searching eventually turns up [Fahad07], which simply +states that the discretized versions of these equations are: + + I_xx u + I_xy v + I_xt = 0 + I_yx u + I_yy v + I_yt = 0. + +which seems to match well what the DIS code uses. Note that even though +this is an equation set equal to zero, we can't just solve for them; +we need to make (penalized, normalized) energy terms and add them to +the other terms. This gives + + E_G = β_x (I_xx u + I_xy v + I_xt) + β_y (I_yx u + I_yy v + I_yt) + +with normalization terms + + β_x = 1 / (abs(∇(I_x))² + 0.01) (∇(I_x) is the gradient of ∂I/∂x) + β_y = 1 / (abs(∇(I_y))² + 0.01) + +(The DIS paper writes ∇I_dx and ∇I_dy instead of ∇I_x and ∇I_y, but I believe +that's a typo; the DeepFlow paper says ∇I_x and ∇I_y.) + +The papers both write that Ψ(E_G) is used, which would mean that the penalized +term is + + E_G = sqrt((β_x (I_xx u + I_xy v + I_xt) + β_y (I_yx u + I_yy v + I_yt))² + ε²) + +but that isn't what the code actually does. Instead, it seems that the two +terms are squared independently: + + E_G = sqrt((β_x (I_xx u + I_xy v + I_xt))² + (β_y (I_yx u + I_yy v + I_yt))² + ε²) + +Both are solvable just fine, and it probably does not matter all that much +which we use in practice (although [Zimmer11] suggests that if we are using +multichannel images, we should penalize the three channels separately), +but we follow what the code actually does here. + +We can differentiate them and equate them to zero as before: + +> M_x := (u,v) -> B_x * (I_xx * u + I_xy * v + I_xt); + M_x := (u, v) -> B_x (I_xx u + I_xy v + I_xt) + +> M_y := (u,v) -> B_y * (I_xy * u + I_yy * v + I_yt); + M_y := (u, v) -> B_y (I_xy u + I_yy v + I_yt) + +> diff(sqrt(M_x(u,v)^2 + M_y(u,v)^2 + e), u); + 2 2 + 2 (I_xx u + I_xy v + I_xt) B_x I_xx + 2 B_y (I_xy u + I_yy v + I_yt) I_xy + --------------------------------------------------------------------------- + 2 2 2 2 1/2 + 2 ((I_xx u + I_xy v + I_xt) B_x + B_y (I_xy u + I_yy v + I_yt) + e) + +> diff(sqrt(M_x(u,v)^2 + M_y(u,v)^2 + e), v); + 2 2 + 2 (I_xx u + I_xy v + I_xt) B_x I_xy + 2 B_y (I_xy u + I_yy v + I_yt) I_yy + --------------------------------------------------------------------------- + 2 2 2 2 1/2 + 2 ((I_xx u + I_xy v + I_xt) B_x + B_y (I_xy u + I_yy v + I_yt) + e) + +Using the same fixed-point scheme where we hold the terms in the +denominator constant and equal to last iteration's values, we get +a new common constant + + k2 = 1 / sqrt(β_x² (I_xx u' + I_xy v' + I_xt)² + β_y² (I_xy u' + I_yy v' + I_yt)²) + +and for brevity + + k_x = k2 β_x² + k_y = k2 β_y² + +and thus, collecting terms for u and v, we get the two equations: + + (k_x I_xx² + k_y I_xy²) u + (k_x I_xx I_xy + k_y I_xy I_yy) v = - k_x I_xx I_xt - k_y I_xy I_yt + (k_x I_xx I_xy + k_y I_xy I_yy) u + (k_x I_xy² + k_y I_yy²) v = - k_x I_xy I_xt - k_y I_yy I_yt + +which is linear in u and v, not colinear (unless we are extremely +unlucky), and can be easily solved. + +Of course, for optimizing the weighted sum σ Ψ(E_I) + γ Ψ(E_G), +we just add the two equation sets pairwise with appropriate weights. + +There's a small discrepancy here: The equations suggest that we should +be be squaring the normalization terms β_0², β_x², β_y²; however, the +code does not appear to do so. It's possible that they were intended to be +added outside of the penalization, e.g. Ψ(a²) = sqrt(β a² + ε²), but given +that these come from [Zimmer11], which mentions nothing of the sort, +I'll just have to assume that this is an implementation mishap. + +The final smoothness term the one that binds the flow field together as a whole +so that we don't have WxH completely independent equations (with its positive +and negative sides, of course). It is the simplest in terms of notation, +but it requires the full power of the Euler-Lagrange equations to minimize, +so we'll need to figure that part out. + + E_S = abs(∇u)² + abs(∇v)² + +or + + E_S = (u_x² + u_y²) + (v_x² + v_y²) + +The penalized form used in the DeepFlow notation, contrary to what you'd expect +from the paper, is: + + E_S = sqrt(u_x² + u_y² + v_x² + v_y² + ε²) + +How would one go about to minimize such an expression by u? (We'll deal with v +later.) It's perhaps no big surprise that the expression involves double +derivatives, but the full form involves the Euler-Lagrange equations. +They allow us to minimize expressions that contain x, y, u(x, y) _and_ the partial +derivatives u_x(x, y) and u_y(x, y), although the answer becomes a differential +equation. + +The Wikipedia page is, unfortunately, suitable for scaring small +children, but the general idea is: Differentiate the expression by u_x +(yes, differentiating by a partial derivative!), negate it, and then +differentiate the result by x. Then do the same thing by u_y and y, +add the two results together and equate to zero. Mathematically +(https://en.wikipedia.org/wiki/Euler%E2%80%93Lagrange_equation#Several_functions_of_several_variables_with_single_derivative): + + ∂E/∂u - ∂/∂x (∂E/∂u_x) - ∂/∂y (∂E/∂u_y) = 0 + +The first term disappears, since we don't have a non-differentiated +u(x, y) in E_S. (Previously, the two _other_ terms would disappear, +because we didn't have u_x or u_y in E_I or E_G.) This means we get + + - ∂/∂x (u_x / sqrt(u_x² + u_y² + v_x² + v_y² + ε²)) - ∂/∂y (u_y / sqrt(u_x² + u_y² + v_x² + v_y² + ε²)) = 0 + +(We don't remove the minus signs since this is supposed to be added to +all the other terms.) + +This is what's called an _anisotropic diffusion_ (or Perona–Malik diffusion) +equation, and is extensively described in literature. It has the effect of +smoothing the flow more in some places than others; in particular, it does +not smooth as strongly near edges, so it is edge-preserving. (It's a bit odd to +call it anisotropic, since it does smooth equally in all directions; +[Brox05] calls it vector-valued diffusion.) + +We'd love to our usual trick of keeping the nonlinear terms in the denominator +constant, but alas, we can't do that yet, since it's under the differentiation +operator; this factor has to be discretized together with u before we can treat +it as a constant. So instead, we'll define it as a function (called the +_diffusivity_ at the given point): + + g(x, y) = -1 / sqrt(u_x² + u_y² + v_x² + v_y² + ε²) = 0 + +which gives us + + ∂/∂x ( g(x, y) u_x ) + ∂/∂y ( g(x, y) u_y ) = 0 + +We'll also have a similar equation for minimizing v, of course: + + ∂/∂x ( g(x, y) v_x ) + ∂/∂y ( g(x, y) v_y ) = 0 + +There's no normalization term β here, unlike the other terms; DeepFlow2 +adds one, but we're not including it here. + +At this point, we make a tweak. This seemingly goes back to at least +[Brox04], which also makes the same tweak to all the other terms +(which we don't, but see below). We split u (and v) into something +based on the original value plus a differential du (and dv), and then +solve for du (or dv) instead. (In math-speak, we are moving to an +implicit method, which is often more numerically stable.) In other words, + + u(x, y) = u0(x, y) + du(x, y) + +where u0(x, y) is the initial guess for the flow. (It's not the value +from previous iteration, for reasons that will be clear later, it's +the first one. [Brox04] differs here, but it does a number of things +differently in the numerics anyway.) + +This gives us: + + ∂/∂x ( g(x, y) (u0 + du)_x ) + ∂/∂y ( g(x, y) (u0 + du)_y ) = 0 + +or + + ∂/∂x ( g(x, y) du_x ) + ∂/∂y ( g(x, y) du_y ) = - ∂/∂x ( g(x, y) u0_x ) - ∂/∂y ( g(x, y) u0_y ) + +where the right-hand side is effectively a constant for these purposes +(although it still needs to be calculated anew for each iteration, +since g(x, y) changes). + +Of course, now we have a different problem; all the other terms are +formulated in terms of u and v, not du and dv. DeepFlow solves this +by not searching for the flow between I_0 and I_1, but between I_0 and +a pre-warped I_1. In other words, before any of the derivatives involving +I_t are calculated, we calculate an I_w with bilinear interpolation: + + I_w(x, y) = I_1(x + u0(x, y), y + v0(x, y)) + +and then redefine I_t (occasionally called I_z) as + + I_t(x, y) = I_w(x, y) - I_0(x, y) + +Note that the plus sign effectively means inverting the flow, so if +the u0 and v0 were already correctly estimated, perfectly smooth and linear +everywhere, I_w = I_0. (All spatial derivatives are calculated on the mean +between I_0 and I_w; the paper doesn't mention this.) After this, all the +equations for E_I and E_G earlier will still hold, they will just be +calculating du and dv instead. Note that this means we have three values +for the flow; there's u0 for the initial guess, du for the current guess +of delta from u0 (which makes u0 + du the current guess of the flow), +and du' for the previous guess of delta from u0. (The initial values for +du' and dv' will be zero.) + +Now back to our equations, as we look at practical implementation: + + ∂/∂x ( g(x, y) du_x ) + ∂/∂y ( g(x, y) du_y ) = - ∂/∂x ( g(x, y) u0_x ) - ∂/∂y ( g(x, y) u0_y ) + ∂/∂x ( g(x, y) dv_x ) + ∂/∂y ( g(x, y) dv_y ) = - ∂/∂x ( g(x, y) v0_x ) - ∂/∂y ( g(x, y) v0_y ) + +We can discretize the left-hand and right-hand side identically, so let's +look only at + + ∂/∂x ( g(x, y) du_x ) + ∂/∂y ( g(x, y) du_y ) + +[Brox05] equation (2.14) (which refers to a 1998 book, although I couldn't +immediately find the equation in question in that book) discretizes this as + + 1/2 (g(x+1, y) + g(x, y)) (du(x+1, y) - du(x, y)) + - 1/2 (g(x-1, y) + g(x, y)) (du(x, y) - du(x-1, y)) + + 1/2 (g(x, y+1) + g(x, y)) (du(x, y+1) - du(x, y)) + - 1/2 (g(x, y-1) + g(x, y)) (du(x, y) - du(x, y-1)) + +It also mentions that it would be better to sample g at the half-way points, +e.g. g(x+0.5, y), but that begs the question exactly how we'd do that, and +DeepFlow doesn't seem to care, so we stick with their version. + +Now we can finally let g use the values of the flow (note that this is the +actual flow u and v, not du and dv!) from the previous iteration, as before: + + g(x, y) = -1 / sqrt(u'_x² + u'_y² + v'_x² + v'_y² + ε²) = 0 + +The single derivatives in g(x) are approximated by standard central differences +(see https://en.wikipedia.org/wiki/Finite_difference_coefficient), e.g. + + u_x(x, y) = 1/2 (u(x + 1, y) - u(x - 1, y)) + +although the derivatives of I are using the fancier + + I_x(x, y) = 1/12 (-I(x - 2, y) + 8 I(x - 1, y) - 8 I(x - 1, y) + I(x - 2, y)) + +I assume this is because I_x derivatives are calculated only once, so we can +afford more accurate derivatives (or possibly simply because of influence +from earlier papers). + +Let's now define a smoothness constant between the neighbors (x,y) and (x1,y1): + + s(x1, y1) = 1/2 (g(x, y) + g(x1, y1)) + +Collecting all the du(x, y) terms of the discretized equation above, +ignoring the right-hand side, which is just a constant for us anyway: + + s(x+1, y) (du(x+1, y) - du(x, y)) + - s(x-1, y) (du(x, y) - du(x-1, y)) + + s(x, y+1) (du(x, y+1) - du(x, y)) + - s(x, y-1) (du(x, y) - du(x, y-1)) = C + + s(x+1, y) du(x+1, y) - s(x+1, y) du(x, y) + - s(x-1, y) du(x, y) + s(x-1, y) du(x-1, y) + + s(x, y+1) du(x, y+1) - s(x, y+1) du(x, y) + - s(x, y-1) du(x, y) + s(x, y-1) du(x, y-1) = C + + - (s(x+1, y) + s(x-1, y) + s(x, y+1) + s(x, y-1)) du(x, y) = + - s(x+1, y) du(x+1, y) - s(x-1, y) du(x-1, y) - s(x, y+1) du(x, y+1) - s(x, y-1) du(x, y-1) + C + +It is interesting to note that if s = 1 uniformly, which would be the case +without our penalizer Ψ(a²), we would have the familiar discrete Laplacian, +where du(x, y) would seek to simply become the average of its four immediate +neighbors. + +Now our equation system is finally complete and linear, and the rest is +fairly pedestrian. The last term connects all the unknowns together, +but we still solve them mostly as 2x2 matrices. The most basic iterative +method is Gauss-Seidel, where we solve du(x, y) and dv(x,y) using the +previous iteration's value for all other du/dv values. (That this converges +at all it beyond this text to prove, but it does. Not that we bother +iterating until it converges; a few iterations is good enough.) +Jacobi iterations improve on this in that (surprisingly!) using this +iteration's computed du/dv values if they're ready; this improves convergence, +but is hard to parallelize. On the GPU, we render to the same texture as +we render from; as per the OpenGL spec, this will give us undefined behavior +on read (since our read/write sets are neither identical nor disjoint), +but in practice, we'll get either the old value (Gauss-Seidel) or the +new one (Jacobi); more likely, the former. + +Successive over-relaxation (SOR) improves further on this, in that it +assumes that the solution moves towards the right value, so why not +just go a bit further? That is, if Jacobi would tell you to increase +the flow by 1.0 pixel to the right, perhaps go 1.5 pixels to the right +instead (this value is called ω). Again, the convergence proof is beyond the +scope here, but SOR converges for any ω between 1 and 2 (1 gives plain +Jacobi, and over 2, we risk overshooting and never converging). Optimal +ω depends on the equation system; DIS uses ω = 1.6, which presumably was +measured, and should be OK for us too, even if we are closer to Gauss-Seidel +than to Jacobi. + +Do note that the DeepFlow code does not fully use SOR or even Gauss-Seidel; +it solves every 2x2 block (ie., single du/dv pair) using Cramer's rule, +and then pushes that vector 80% further, SOR-style. This would be clearly +more accurate if we didn't have SOR in the mix (since du and dv would +converge immediately relative to each other, bar Cramer's numerical issues), +but I'm not sure whether it's better given SOR. (DIS changes this to a more +traditional SOR formulation, which we also use.) + +And that's it. References: + +[Brox04]: Brox, Bruhn, Papenberg, Weickert: “High Accuracy Optical Flow + Estimation Based on a Theory for Warping”, in Proceedings of the European + Conference on Computer Vision (ECCV), 2004 +[Brox05]: Brox: “From Pixels to Regions: Partial Differential Equations in + Image Analysis”, PhD thesis, 2005 +[Brox11]: Brox, Malik: “Large Displacement Optical Flow: Descriptor Matching in + Variational Motion Estimation”, IEEE Transactions on Pattern Analysis and + Machine Intelligence, 2011 +[Ciarlet78]: Ciarlet: “The Finite Element Method for Elliptic Problems”, 1978 +[Fahad07]: Fahad, Morris: “Multiple Combined Constraints for Optical Flow + Estimation”, in Proceedings of the 3rd International Conference on Advances + in Visual Computing (ISVC), 2007 +[Kroeger05]: Kroeger, Timofte, Dai, van Gool: “Fast Optical Flow using Dense + Inverse Search”, in Proceedings of the European Conference on Computer Vision + (ECCV), 2016 +[Weinzaepfel13]: Weinzaepfel, Revaud, Harchaoui, Schmid: “DeepFlow: Large + displacement optical flow with deep matching”, in IEEE International Conference + on Computer Vision (ICCV), 2013 +[Zimmer11]: Zimmer, Bruhn, Weickert: “Optic Flow in Harmony”, International + Journal of Computer Vision, 2011 -- 2.39.5