]> git.sesse.net Git - nageru/blob - futatabi/flow.cpp
Log a warning when we kill a client that is not keeping up.
[nageru] / futatabi / flow.cpp
1 #define NO_SDL_GLEXT 1
2
3 #include "flow.h"
4
5 #include "embedded_files.h"
6 #include "gpu_timers.h"
7 #include "shared/read_file.h"
8 #include "util.h"
9
10 #include <algorithm>
11 #include <assert.h>
12 #include <deque>
13 #include <dlfcn.h>
14 #include <epoxy/gl.h>
15 #include <map>
16 #include <memory>
17 #include <stack>
18 #include <stdio.h>
19 #include <string.h>
20 #include <unistd.h>
21 #include <vector>
22
23 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
24
25 using namespace std;
26
27 // Weighting constants for the different parts of the variational refinement.
28 // These don't correspond 1:1 to the values given in the DIS paper,
29 // since we have different normalizations and ranges in some cases.
30 // These are found through a simple grid search on some MPI-Sintel data,
31 // although the error (EPE) seems to be fairly insensitive to the precise values.
32 // Only the relative values matter, so we fix alpha (the smoothness constant)
33 // at unity and tweak the others.
34 //
35 // TODO: Maybe this should not be global.
36 float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
37
38 // Some global OpenGL objects.
39 // TODO: These should really be part of DISComputeFlow.
40 GLuint nearest_sampler, linear_sampler, zero_border_sampler;
41 GLuint vertex_vbo;
42
43 int find_num_levels(int width, int height)
44 {
45         int levels = 1;
46         for (int w = width, h = height; w > 1 || h > 1;) {
47                 w >>= 1;
48                 h >>= 1;
49                 ++levels;
50         }
51         return levels;
52 }
53
54 GLuint compile_shader(const string &shader_src, GLenum type)
55 {
56         GLuint obj = glCreateShader(type);
57         const GLchar *source[] = { shader_src.data() };
58         const GLint length[] = { (GLint)shader_src.size() };
59         glShaderSource(obj, 1, source, length);
60         glCompileShader(obj);
61
62         GLchar info_log[4096];
63         GLsizei log_length = sizeof(info_log) - 1;
64         glGetShaderInfoLog(obj, log_length, &log_length, info_log);
65         info_log[log_length] = 0;
66         if (strlen(info_log) > 0) {
67                 fprintf(stderr, "Shader compile log: %s\n", info_log);
68         }
69
70         GLint status;
71         glGetShaderiv(obj, GL_COMPILE_STATUS, &status);
72         if (status == GL_FALSE) {
73                 // Add some line numbers to easier identify compile errors.
74                 string src_with_lines = "/*   1 */ ";
75                 size_t lineno = 1;
76                 for (char ch : shader_src) {
77                         src_with_lines.push_back(ch);
78                         if (ch == '\n') {
79                                 char buf[32];
80                                 snprintf(buf, sizeof(buf), "/* %3zu */ ", ++lineno);
81                                 src_with_lines += buf;
82                         }
83                 }
84
85                 fprintf(stderr, "Failed to compile shader:\n%s\n", src_with_lines.c_str());
86                 abort();
87         }
88
89         return obj;
90 }
91
92 GLuint link_program(GLuint vs_obj, GLuint fs_obj)
93 {
94         GLuint program = glCreateProgram();
95         glAttachShader(program, vs_obj);
96         glAttachShader(program, fs_obj);
97         glLinkProgram(program);
98         GLint success;
99         glGetProgramiv(program, GL_LINK_STATUS, &success);
100         if (success == GL_FALSE) {
101                 GLchar error_log[1024] = { 0 };
102                 glGetProgramInfoLog(program, 1024, nullptr, error_log);
103                 fprintf(stderr, "Error linking program: %s\n", error_log);
104                 abort();
105         }
106         return program;
107 }
108
109 void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler)
110 {
111         if (location == -1) {
112                 return;
113         }
114
115         glBindTextureUnit(texture_unit, tex);
116         glBindSampler(texture_unit, sampler);
117         glProgramUniform1i(program, location, texture_unit);
118 }
119
120 template<size_t num_elements>
121 void PersistentFBOSet<num_elements>::render_to(const array<GLuint, num_elements> &textures)
122 {
123         auto it = fbos.find(textures);
124         if (it != fbos.end()) {
125                 glBindFramebuffer(GL_FRAMEBUFFER, it->second);
126                 return;
127         }
128
129         GLuint fbo;
130         glCreateFramebuffers(1, &fbo);
131         GLenum bufs[num_elements];
132         for (size_t i = 0; i < num_elements; ++i) {
133                 glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
134                 bufs[i] = GL_COLOR_ATTACHMENT0 + i;
135         }
136         glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
137
138         fbos[textures] = fbo;
139         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
140 }
141
142 template<size_t num_elements>
143 void PersistentFBOSetWithDepth<num_elements>::render_to(GLuint depth_rb, const array<GLuint, num_elements> &textures)
144 {
145         auto key = make_pair(depth_rb, textures);
146
147         auto it = fbos.find(key);
148         if (it != fbos.end()) {
149                 glBindFramebuffer(GL_FRAMEBUFFER, it->second);
150                 return;
151         }
152
153         GLuint fbo;
154         glCreateFramebuffers(1, &fbo);
155         GLenum bufs[num_elements];
156         glNamedFramebufferRenderbuffer(fbo, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_rb);
157         for (size_t i = 0; i < num_elements; ++i) {
158                 glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
159                 bufs[i] = GL_COLOR_ATTACHMENT0 + i;
160         }
161         glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
162
163         fbos[key] = fbo;
164         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
165 }
166
167 GrayscaleConversion::GrayscaleConversion()
168 {
169         gray_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
170         gray_fs_obj = compile_shader(read_file("gray.frag", _binary_gray_frag_data, _binary_gray_frag_size), GL_FRAGMENT_SHADER);
171         gray_program = link_program(gray_vs_obj, gray_fs_obj);
172
173         // Set up the VAO containing all the required position/texcoord data.
174         glCreateVertexArrays(1, &gray_vao);
175         glBindVertexArray(gray_vao);
176
177         GLint position_attrib = glGetAttribLocation(gray_program, "position");
178         glEnableVertexArrayAttrib(gray_vao, position_attrib);
179         glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
180
181         uniform_tex = glGetUniformLocation(gray_program, "tex");
182 }
183
184 void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height, int num_layers)
185 {
186         glUseProgram(gray_program);
187         bind_sampler(gray_program, uniform_tex, 0, tex, nearest_sampler);
188
189         glViewport(0, 0, width, height);
190         fbos.render_to(gray_tex);
191         glBindVertexArray(gray_vao);
192         glDisable(GL_BLEND);
193         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
194 }
195
196 Sobel::Sobel()
197 {
198         sobel_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
199         sobel_fs_obj = compile_shader(read_file("sobel.frag", _binary_sobel_frag_data, _binary_sobel_frag_size), GL_FRAGMENT_SHADER);
200         sobel_program = link_program(sobel_vs_obj, sobel_fs_obj);
201
202         uniform_tex = glGetUniformLocation(sobel_program, "tex");
203 }
204
205 void Sobel::exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers)
206 {
207         glUseProgram(sobel_program);
208         bind_sampler(sobel_program, uniform_tex, 0, tex_view, nearest_sampler);
209
210         glViewport(0, 0, level_width, level_height);
211         fbos.render_to(grad_tex);
212         glDisable(GL_BLEND);
213         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
214 }
215
216 MotionSearch::MotionSearch(const OperatingPoint &op)
217         : op(op)
218 {
219         motion_vs_obj = compile_shader(read_file("motion_search.vert", _binary_motion_search_vert_data, _binary_motion_search_vert_size), GL_VERTEX_SHADER);
220         motion_fs_obj = compile_shader(read_file("motion_search.frag", _binary_motion_search_frag_data, _binary_motion_search_frag_size), GL_FRAGMENT_SHADER);
221         motion_search_program = link_program(motion_vs_obj, motion_fs_obj);
222
223         uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
224         uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
225         uniform_out_flow_size = glGetUniformLocation(motion_search_program, "out_flow_size");
226         uniform_image_tex = glGetUniformLocation(motion_search_program, "image_tex");
227         uniform_grad_tex = glGetUniformLocation(motion_search_program, "grad_tex");
228         uniform_flow_tex = glGetUniformLocation(motion_search_program, "flow_tex");
229         uniform_patch_size = glGetUniformLocation(motion_search_program, "patch_size");
230         uniform_num_iterations = glGetUniformLocation(motion_search_program, "num_iterations");
231 }
232
233 void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers)
234 {
235         glUseProgram(motion_search_program);
236
237         bind_sampler(motion_search_program, uniform_image_tex, 0, tex_view, linear_sampler);
238         bind_sampler(motion_search_program, uniform_grad_tex, 1, grad_tex, nearest_sampler);
239         bind_sampler(motion_search_program, uniform_flow_tex, 2, flow_tex, linear_sampler);
240
241         glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
242         glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
243         glProgramUniform2f(motion_search_program, uniform_out_flow_size, width_patches, height_patches);
244         glProgramUniform1ui(motion_search_program, uniform_patch_size, op.patch_size_pixels);
245         glProgramUniform1ui(motion_search_program, uniform_num_iterations, op.search_iterations);
246
247         glViewport(0, 0, width_patches, height_patches);
248         fbos.render_to(flow_out_tex);
249         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
250 }
251
252 Densify::Densify(const OperatingPoint &op)
253         : op(op)
254 {
255         densify_vs_obj = compile_shader(read_file("densify.vert", _binary_densify_vert_data, _binary_densify_vert_size), GL_VERTEX_SHADER);
256         densify_fs_obj = compile_shader(read_file("densify.frag", _binary_densify_frag_data, _binary_densify_frag_size), GL_FRAGMENT_SHADER);
257         densify_program = link_program(densify_vs_obj, densify_fs_obj);
258
259         uniform_patch_size = glGetUniformLocation(densify_program, "patch_size");
260         uniform_image_tex = glGetUniformLocation(densify_program, "image_tex");
261         uniform_flow_tex = glGetUniformLocation(densify_program, "flow_tex");
262 }
263
264 void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers)
265 {
266         glUseProgram(densify_program);
267
268         bind_sampler(densify_program, uniform_image_tex, 0, tex_view, linear_sampler);
269         bind_sampler(densify_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
270
271         glProgramUniform2f(densify_program, uniform_patch_size,
272                            float(op.patch_size_pixels) / level_width,
273                            float(op.patch_size_pixels) / level_height);
274
275         glViewport(0, 0, level_width, level_height);
276         glEnable(GL_BLEND);
277         glBlendFunc(GL_ONE, GL_ONE);
278         fbos.render_to(dense_flow_tex);
279         glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
280         glClear(GL_COLOR_BUFFER_BIT);
281         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width_patches * height_patches * num_layers);
282 }
283
284 Prewarp::Prewarp()
285 {
286         prewarp_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
287         prewarp_fs_obj = compile_shader(read_file("prewarp.frag", _binary_prewarp_frag_data, _binary_prewarp_frag_size), GL_FRAGMENT_SHADER);
288         prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj);
289
290         uniform_image_tex = glGetUniformLocation(prewarp_program, "image_tex");
291         uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
292 }
293
294 void Prewarp::exec(GLuint tex_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height, int num_layers)
295 {
296         glUseProgram(prewarp_program);
297
298         bind_sampler(prewarp_program, uniform_image_tex, 0, tex_view, linear_sampler);
299         bind_sampler(prewarp_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
300
301         glViewport(0, 0, level_width, level_height);
302         glDisable(GL_BLEND);
303         fbos.render_to(I_tex, I_t_tex, normalized_flow_tex);
304         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
305 }
306
307 Derivatives::Derivatives()
308 {
309         derivatives_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
310         derivatives_fs_obj = compile_shader(read_file("derivatives.frag", _binary_derivatives_frag_data, _binary_derivatives_frag_size), GL_FRAGMENT_SHADER);
311         derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj);
312
313         uniform_tex = glGetUniformLocation(derivatives_program, "tex");
314 }
315
316 void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers)
317 {
318         glUseProgram(derivatives_program);
319
320         bind_sampler(derivatives_program, uniform_tex, 0, input_tex, nearest_sampler);
321
322         glViewport(0, 0, level_width, level_height);
323         glDisable(GL_BLEND);
324         fbos.render_to(I_x_y_tex, beta_0_tex);
325         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
326 }
327
328 ComputeDiffusivity::ComputeDiffusivity()
329 {
330         diffusivity_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
331         diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag", _binary_diffusivity_frag_data, _binary_diffusivity_frag_size), GL_FRAGMENT_SHADER);
332         diffusivity_program = link_program(diffusivity_vs_obj, diffusivity_fs_obj);
333
334         uniform_flow_tex = glGetUniformLocation(diffusivity_program, "flow_tex");
335         uniform_diff_flow_tex = glGetUniformLocation(diffusivity_program, "diff_flow_tex");
336         uniform_alpha = glGetUniformLocation(diffusivity_program, "alpha");
337         uniform_zero_diff_flow = glGetUniformLocation(diffusivity_program, "zero_diff_flow");
338 }
339
340 void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
341 {
342         glUseProgram(diffusivity_program);
343
344         bind_sampler(diffusivity_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
345         bind_sampler(diffusivity_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler);
346         glProgramUniform1f(diffusivity_program, uniform_alpha, vr_alpha);
347         glProgramUniform1i(diffusivity_program, uniform_zero_diff_flow, zero_diff_flow);
348
349         glViewport(0, 0, level_width, level_height);
350
351         glDisable(GL_BLEND);
352         fbos.render_to(diffusivity_tex);
353         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
354 }
355
356 SetupEquations::SetupEquations()
357 {
358         equations_vs_obj = compile_shader(read_file("equations.vert", _binary_equations_vert_data, _binary_equations_vert_size), GL_VERTEX_SHADER);
359         equations_fs_obj = compile_shader(read_file("equations.frag", _binary_equations_frag_data, _binary_equations_frag_size), GL_FRAGMENT_SHADER);
360         equations_program = link_program(equations_vs_obj, equations_fs_obj);
361
362         uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex");
363         uniform_I_t_tex = glGetUniformLocation(equations_program, "I_t_tex");
364         uniform_diff_flow_tex = glGetUniformLocation(equations_program, "diff_flow_tex");
365         uniform_base_flow_tex = glGetUniformLocation(equations_program, "base_flow_tex");
366         uniform_beta_0_tex = glGetUniformLocation(equations_program, "beta_0_tex");
367         uniform_diffusivity_tex = glGetUniformLocation(equations_program, "diffusivity_tex");
368         uniform_gamma = glGetUniformLocation(equations_program, "gamma");
369         uniform_delta = glGetUniformLocation(equations_program, "delta");
370         uniform_zero_diff_flow = glGetUniformLocation(equations_program, "zero_diff_flow");
371 }
372
373 void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
374 {
375         glUseProgram(equations_program);
376
377         bind_sampler(equations_program, uniform_I_x_y_tex, 0, I_x_y_tex, nearest_sampler);
378         bind_sampler(equations_program, uniform_I_t_tex, 1, I_t_tex, nearest_sampler);
379         bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
380         bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
381         bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
382         bind_sampler(equations_program, uniform_diffusivity_tex, 5, diffusivity_tex, zero_border_sampler);
383         glProgramUniform1f(equations_program, uniform_delta, vr_delta);
384         glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
385         glProgramUniform1i(equations_program, uniform_zero_diff_flow, zero_diff_flow);
386
387         glViewport(0, 0, (level_width + 1) / 2, level_height);
388         glDisable(GL_BLEND);
389         fbos.render_to(equation_red_tex, equation_black_tex);
390         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
391 }
392
393 SOR::SOR()
394 {
395         sor_vs_obj = compile_shader(read_file("sor.vert", _binary_sor_vert_data, _binary_sor_vert_size), GL_VERTEX_SHADER);
396         sor_fs_obj = compile_shader(read_file("sor.frag", _binary_sor_frag_data, _binary_sor_frag_size), GL_FRAGMENT_SHADER);
397         sor_program = link_program(sor_vs_obj, sor_fs_obj);
398
399         uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex");
400         uniform_equation_red_tex = glGetUniformLocation(sor_program, "equation_red_tex");
401         uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex");
402         uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex");
403         uniform_phase = glGetUniformLocation(sor_program, "phase");
404         uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases");
405 }
406
407 void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer)
408 {
409         glUseProgram(sor_program);
410
411         bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
412         bind_sampler(sor_program, uniform_diffusivity_tex, 1, diffusivity_tex, zero_border_sampler);
413         bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler);
414         bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler);
415
416         if (!zero_diff_flow) {
417                 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
418         }
419
420         // NOTE: We bind to the texture we are rendering from, but we never write any value
421         // that we read in the same shader pass (we call discard for red values when we compute
422         // black, and vice versa), and we have barriers between the passes, so we're fine
423         // as per the spec.
424         glViewport(0, 0, level_width, level_height);
425         glDisable(GL_BLEND);
426         fbos.render_to(diff_flow_tex);
427
428         for (int i = 0; i < num_iterations; ++i) {
429                 {
430                         ScopedTimer timer("Red pass", sor_timer);
431                         if (zero_diff_flow && i == 0) {
432                                 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0);
433                         }
434                         glProgramUniform1i(sor_program, uniform_phase, 0);
435                         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
436                         glTextureBarrier();
437                 }
438                 {
439                         ScopedTimer timer("Black pass", sor_timer);
440                         if (zero_diff_flow && i == 0) {
441                                 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1);
442                         }
443                         glProgramUniform1i(sor_program, uniform_phase, 1);
444                         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
445                         if (zero_diff_flow && i == 0) {
446                                 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
447                         }
448                         if (i != num_iterations - 1) {
449                                 glTextureBarrier();
450                         }
451                 }
452         }
453 }
454
455 AddBaseFlow::AddBaseFlow()
456 {
457         add_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
458         add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag", _binary_add_base_flow_frag_data, _binary_add_base_flow_frag_size), GL_FRAGMENT_SHADER);
459         add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj);
460
461         uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex");
462 }
463
464 void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers)
465 {
466         glUseProgram(add_flow_program);
467
468         bind_sampler(add_flow_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
469
470         glViewport(0, 0, level_width, level_height);
471         glEnable(GL_BLEND);
472         glBlendFunc(GL_ONE, GL_ONE);
473         fbos.render_to(base_flow_tex);
474
475         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
476 }
477
478 ResizeFlow::ResizeFlow()
479 {
480         resize_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
481         resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag", _binary_resize_flow_frag_data, _binary_resize_flow_frag_size), GL_FRAGMENT_SHADER);
482         resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj);
483
484         uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex");
485         uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor");
486 }
487
488 void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers)
489 {
490         glUseProgram(resize_flow_program);
491
492         bind_sampler(resize_flow_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
493
494         glProgramUniform2f(resize_flow_program, uniform_scale_factor, float(output_width) / input_width, float(output_height) / input_height);
495
496         glViewport(0, 0, output_width, output_height);
497         glDisable(GL_BLEND);
498         fbos.render_to(out_tex);
499
500         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
501 }
502
503 DISComputeFlow::DISComputeFlow(int width, int height, const OperatingPoint &op)
504         : width(width), height(height), op(op), motion_search(op), densify(op)
505 {
506         // Make some samplers.
507         glCreateSamplers(1, &nearest_sampler);
508         glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
509         glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
510         glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
511         glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
512
513         glCreateSamplers(1, &linear_sampler);
514         glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
515         glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
516         glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
517         glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
518
519         // The smoothness is sampled so that once we get to a smoothness involving
520         // a value outside the border, the diffusivity between the two becomes zero.
521         // Similarly, gradients are zero outside the border, since the edge is taken
522         // to be constant.
523         glCreateSamplers(1, &zero_border_sampler);
524         glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
525         glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
526         glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
527         glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
528         float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };  // Note that zero alpha means we can also see whether we sampled outside the border or not.
529         glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
530
531         // Initial flow is zero, 1x1.
532         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &initial_flow_tex);
533         glTextureStorage3D(initial_flow_tex, 1, GL_RG16F, 1, 1, 1);
534         glClearTexImage(initial_flow_tex, 0, GL_RG, GL_FLOAT, nullptr);
535
536         // Set up the vertex data that will be shared between all passes.
537         float vertices[] = {
538                 0.0f, 1.0f,
539                 0.0f, 0.0f,
540                 1.0f, 1.0f,
541                 1.0f, 0.0f,
542         };
543         glCreateBuffers(1, &vertex_vbo);
544         glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
545
546         glCreateVertexArrays(1, &vao);
547         glBindVertexArray(vao);
548         glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
549
550         GLint position_attrib = 0;  // Hard-coded in every vertex shader.
551         glEnableVertexArrayAttrib(vao, position_attrib);
552         glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
553 }
554
555 GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy)
556 {
557         int num_layers = (flow_direction == FORWARD_AND_BACKWARD) ? 2 : 1;
558         int prev_level_width = 1, prev_level_height = 1;
559         GLuint prev_level_flow_tex = initial_flow_tex;
560
561         GPUTimers timers;
562
563         glBindVertexArray(vao);
564         glDisable(GL_DITHER);
565
566         ScopedTimer total_timer("Compute flow", &timers);
567         for (int level = op.coarsest_level; level >= int(op.finest_level); --level) {
568                 char timer_name[256];
569                 snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
570                 ScopedTimer level_timer(timer_name, &total_timer);
571
572                 int level_width = width >> level;
573                 int level_height = height >> level;
574                 float patch_spacing_pixels = op.patch_size_pixels * (1.0f - op.patch_overlap_ratio);
575
576                 // Make sure we have patches at least every Nth pixel, e.g. for width=9
577                 // and patch_spacing=3 (the default), we put out patch centers in
578                 // x=0, x=3, x=6, x=9, which is four patches. The fragment shader will
579                 // lock all the centers to integer coordinates if needed.
580                 int width_patches = 1 + ceil(level_width / patch_spacing_pixels);
581                 int height_patches = 1 + ceil(level_height / patch_spacing_pixels);
582
583                 // Make sure we always read from the correct level; the chosen
584                 // mipmapping could otherwise be rather unpredictable, especially
585                 // during motion search.
586                 GLuint tex_view;
587                 glGenTextures(1, &tex_view);
588                 glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, tex, GL_R8, level, 1, 0, 2);
589
590                 // Create a new texture to hold the gradients.
591                 GLuint grad_tex = pool.get_texture(GL_R32UI, level_width, level_height, num_layers);
592
593                 // Find the derivative.
594                 {
595                         ScopedTimer timer("Sobel", &level_timer);
596                         sobel.exec(tex_view, grad_tex, level_width, level_height, num_layers);
597                 }
598
599                 // Motion search to find the initial flow. We use the flow from the previous
600                 // level (sampled bilinearly; no fancy tricks) as a guide, then search from there.
601
602                 // Create an output flow texture.
603                 GLuint flow_out_tex = pool.get_texture(GL_RGB16F, width_patches, height_patches, num_layers);
604
605                 // And draw.
606                 {
607                         ScopedTimer timer("Motion search", &level_timer);
608                         motion_search.exec(tex_view, grad_tex, prev_level_flow_tex, flow_out_tex, level_width, level_height, prev_level_width, prev_level_height, width_patches, height_patches, num_layers);
609                 }
610                 pool.release_texture(grad_tex);
611
612                 // Densification.
613
614                 // Set up an output texture (cleared in Densify).
615                 GLuint dense_flow_tex = pool.get_texture(GL_RGB16F, level_width, level_height, num_layers);
616
617                 // And draw.
618                 {
619                         ScopedTimer timer("Densification", &level_timer);
620                         densify.exec(tex_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches, num_layers);
621                 }
622                 pool.release_texture(flow_out_tex);
623
624                 // Everything below here in the loop belongs to variational refinement.
625                 ScopedTimer varref_timer("Variational refinement", &level_timer);
626
627                 // Prewarping; create I and I_t, and a normalized base flow (so we don't
628                 // have to normalize it over and over again, and also save some bandwidth).
629                 //
630                 // During the entire rest of the variational refinement, flow will be measured
631                 // in pixels, not 0..1 normalized OpenGL texture coordinates.
632                 // This is because variational refinement depends so heavily on derivatives,
633                 // which are measured in intensity levels per pixel.
634                 GLuint I_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
635                 GLuint I_t_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
636                 GLuint base_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
637                 {
638                         ScopedTimer timer("Prewarping", &varref_timer);
639                         prewarp.exec(tex_view, dense_flow_tex, I_tex, I_t_tex, base_flow_tex, level_width, level_height, num_layers);
640                 }
641                 pool.release_texture(dense_flow_tex);
642                 glDeleteTextures(1, &tex_view);
643
644                 // TODO: If we don't have variational refinement, we don't need I and I_t,
645                 // so computing them is a waste.
646                 if (op.variational_refinement) {
647                         // Calculate I_x and I_y. We're only calculating first derivatives;
648                         // the others will be taken on-the-fly in order to sample from fewer
649                         // textures overall, since sampling from the L1 cache is cheap.
650                         // (TODO: Verify that this is indeed faster than making separate
651                         // double-derivative textures.)
652                         GLuint I_x_y_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
653                         GLuint beta_0_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
654                         {
655                                 ScopedTimer timer("First derivatives", &varref_timer);
656                                 derivatives.exec(I_tex, I_x_y_tex, beta_0_tex, level_width, level_height, num_layers);
657                         }
658                         pool.release_texture(I_tex);
659
660                         // We need somewhere to store du and dv (the flow increment, relative
661                         // to the non-refined base flow u0 and v0). It's initially garbage,
662                         // but not read until we've written something sane to it.
663                         GLuint diff_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
664
665                         // And for diffusivity.
666                         GLuint diffusivity_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
667
668                         // And finally for the equation set. See SetupEquations for
669                         // the storage format.
670                         GLuint equation_red_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
671                         GLuint equation_black_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
672
673                         for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) {
674                                 // Calculate the diffusivity term for each pixel.
675                                 {
676                                         ScopedTimer timer("Compute diffusivity", &varref_timer);
677                                         compute_diffusivity.exec(base_flow_tex, diff_flow_tex, diffusivity_tex, level_width, level_height, outer_idx == 0, num_layers);
678                                 }
679
680                                 // Set up the 2x2 equation system for each pixel.
681                                 {
682                                         ScopedTimer timer("Set up equations", &varref_timer);
683                                         setup_equations.exec(I_x_y_tex, I_t_tex, diff_flow_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0, num_layers);
684                                 }
685
686                                 // Run a few SOR iterations. Note that these are to/from the same texture.
687                                 {
688                                         ScopedTimer timer("SOR", &varref_timer);
689                                         sor.exec(diff_flow_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, num_layers, &timer);
690                                 }
691                         }
692
693                         pool.release_texture(I_t_tex);
694                         pool.release_texture(I_x_y_tex);
695                         pool.release_texture(beta_0_tex);
696                         pool.release_texture(diffusivity_tex);
697                         pool.release_texture(equation_red_tex);
698                         pool.release_texture(equation_black_tex);
699
700                         // Add the differential flow found by the variational refinement to the base flow,
701                         // giving the final flow estimate for this level.
702                         // The output is in base_flow_tex; we don't need to make a new texture.
703                         {
704                                 ScopedTimer timer("Add differential flow", &varref_timer);
705                                 add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height, num_layers);
706                         }
707                         pool.release_texture(diff_flow_tex);
708                 }
709
710                 if (prev_level_flow_tex != initial_flow_tex) {
711                         pool.release_texture(prev_level_flow_tex);
712                 }
713                 prev_level_flow_tex = base_flow_tex;
714                 prev_level_width = level_width;
715                 prev_level_height = level_height;
716         }
717         total_timer.end();
718
719         if (!in_warmup) {
720                 timers.print();
721         }
722
723         // Scale up the flow to the final size (if needed).
724         if (op.finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
725                 return prev_level_flow_tex;
726         } else {
727                 GLuint final_tex = pool.get_texture(GL_RG16F, width, height, num_layers);
728                 resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height, num_layers);
729                 pool.release_texture(prev_level_flow_tex);
730                 return final_tex;
731         }
732 }
733
734 Splat::Splat(const OperatingPoint &op)
735         : op(op)
736 {
737         splat_vs_obj = compile_shader(read_file("splat.vert", _binary_splat_vert_data, _binary_splat_vert_size), GL_VERTEX_SHADER);
738         splat_fs_obj = compile_shader(read_file("splat.frag", _binary_splat_frag_data, _binary_splat_frag_size), GL_FRAGMENT_SHADER);
739         splat_program = link_program(splat_vs_obj, splat_fs_obj);
740
741         uniform_splat_size = glGetUniformLocation(splat_program, "splat_size");
742         uniform_alpha = glGetUniformLocation(splat_program, "alpha");
743         uniform_gray_tex = glGetUniformLocation(splat_program, "gray_tex");
744         uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex");
745         uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size");
746 }
747
748 void Splat::exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha)
749 {
750         glUseProgram(splat_program);
751
752         bind_sampler(splat_program, uniform_gray_tex, 0, gray_tex, linear_sampler);
753         bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
754
755         glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
756         glProgramUniform1f(splat_program, uniform_alpha, alpha);
757         glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
758
759         glViewport(0, 0, width, height);
760         glDisable(GL_BLEND);
761         glEnable(GL_DEPTH_TEST);
762         glDepthMask(GL_TRUE);
763         glDepthFunc(GL_LESS);  // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.)
764
765         fbos.render_to(depth_rb, flow_tex);
766
767         // Evidently NVIDIA doesn't use fast clears for glClearTexImage, so clear now that
768         // we've got it bound.
769         glClearColor(1000.0f, 1000.0f, 0.0f, 1.0f);  // Invalid flow.
770         glClearDepth(1.0f);  // Effectively infinity.
771         glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
772
773         glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height * 2);
774
775         glDisable(GL_DEPTH_TEST);
776 }
777
778 HoleFill::HoleFill()
779 {
780         fill_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER);
781         fill_fs_obj = compile_shader(read_file("hole_fill.frag", _binary_hole_fill_frag_data, _binary_hole_fill_frag_size), GL_FRAGMENT_SHADER);
782         fill_program = link_program(fill_vs_obj, fill_fs_obj);
783
784         uniform_tex = glGetUniformLocation(fill_program, "tex");
785         uniform_z = glGetUniformLocation(fill_program, "z");
786         uniform_sample_offset = glGetUniformLocation(fill_program, "sample_offset");
787 }
788
789 void HoleFill::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
790 {
791         glUseProgram(fill_program);
792
793         bind_sampler(fill_program, uniform_tex, 0, flow_tex, nearest_sampler);
794
795         glProgramUniform1f(fill_program, uniform_z, 1.0f - 1.0f / 1024.0f);
796
797         glViewport(0, 0, width, height);
798         glDisable(GL_BLEND);
799         glEnable(GL_DEPTH_TEST);
800         glDepthFunc(GL_LESS);  // Only update the values > 0.999f (ie., only invalid pixels).
801
802         fbos.render_to(depth_rb, flow_tex);  // NOTE: Reading and writing to the same texture.
803
804         // Fill holes from the left, by shifting 1, 2, 4, 8, etc. pixels to the right.
805         for (int offs = 1; offs < width; offs *= 2) {
806                 glProgramUniform2f(fill_program, uniform_sample_offset, -offs / float(width), 0.0f);
807                 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
808                 glTextureBarrier();
809         }
810         glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[0], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
811
812         // Similar to the right; adjust Z a bit down, so that we re-fill the pixels that
813         // were overwritten in the last algorithm.
814         glProgramUniform1f(fill_program, uniform_z, 1.0f - 2.0f / 1024.0f);
815         for (int offs = 1; offs < width; offs *= 2) {
816                 glProgramUniform2f(fill_program, uniform_sample_offset, offs / float(width), 0.0f);
817                 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
818                 glTextureBarrier();
819         }
820         glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[1], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
821
822         // Up.
823         glProgramUniform1f(fill_program, uniform_z, 1.0f - 3.0f / 1024.0f);
824         for (int offs = 1; offs < height; offs *= 2) {
825                 glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, -offs / float(height));
826                 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
827                 glTextureBarrier();
828         }
829         glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[2], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
830
831         // Down.
832         glProgramUniform1f(fill_program, uniform_z, 1.0f - 4.0f / 1024.0f);
833         for (int offs = 1; offs < height; offs *= 2) {
834                 glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, offs / float(height));
835                 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
836                 glTextureBarrier();
837         }
838
839         glDisable(GL_DEPTH_TEST);
840 }
841
842 HoleBlend::HoleBlend()
843 {
844         blend_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER);  // Reuse the vertex shader from the fill.
845         blend_fs_obj = compile_shader(read_file("hole_blend.frag", _binary_hole_blend_frag_data, _binary_hole_blend_frag_size), GL_FRAGMENT_SHADER);
846         blend_program = link_program(blend_vs_obj, blend_fs_obj);
847
848         uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
849         uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
850         uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
851         uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
852         uniform_z = glGetUniformLocation(blend_program, "z");
853         uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
854 }
855
856 void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
857 {
858         glUseProgram(blend_program);
859
860         bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
861         bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
862         bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
863         bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
864
865         glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
866         glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
867
868         glViewport(0, 0, width, height);
869         glDisable(GL_BLEND);
870         glEnable(GL_DEPTH_TEST);
871         glDepthFunc(GL_LEQUAL);  // Skip over all of the pixels that were never holes to begin with.
872
873         fbos.render_to(depth_rb, flow_tex);  // NOTE: Reading and writing to the same texture.
874
875         glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
876
877         glDisable(GL_DEPTH_TEST);
878 }
879
880 Blend::Blend(bool split_ycbcr_output)
881         : split_ycbcr_output(split_ycbcr_output)
882 {
883         string frag_shader = read_file("blend.frag", _binary_blend_frag_data, _binary_blend_frag_size);
884         if (split_ycbcr_output) {
885                 // Insert after the first #version line.
886                 size_t offset = frag_shader.find('\n');
887                 assert(offset != string::npos);
888                 frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1);
889         }
890
891         blend_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
892         blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER);
893         blend_program = link_program(blend_vs_obj, blend_fs_obj);
894
895         uniform_image_tex = glGetUniformLocation(blend_program, "image_tex");
896         uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
897         uniform_alpha = glGetUniformLocation(blend_program, "alpha");
898         uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
899 }
900
901 void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha)
902 {
903         glUseProgram(blend_program);
904         bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler);
905         bind_sampler(blend_program, uniform_flow_tex, 1, flow_tex, linear_sampler);  // May be upsampled.
906         glProgramUniform1f(blend_program, uniform_alpha, alpha);
907
908         glViewport(0, 0, level_width, level_height);
909         if (split_ycbcr_output) {
910                 fbos_split.render_to(output_tex, output2_tex);
911         } else {
912                 fbos.render_to(output_tex);
913         }
914         glDisable(GL_BLEND);  // A bit ironic, perhaps.
915         glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
916 }
917
918 Interpolate::Interpolate(const OperatingPoint &op, bool split_ycbcr_output)
919         : flow_level(op.finest_level),
920           split_ycbcr_output(split_ycbcr_output),
921           splat(op),
922           blend(split_ycbcr_output)
923 {
924         // Set up the vertex data that will be shared between all passes.
925         float vertices[] = {
926                 0.0f, 1.0f,
927                 0.0f, 0.0f,
928                 1.0f, 1.0f,
929                 1.0f, 0.0f,
930         };
931         glCreateBuffers(1, &vertex_vbo);
932         glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
933
934         glCreateVertexArrays(1, &vao);
935         glBindVertexArray(vao);
936         glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
937
938         GLint position_attrib = 0;  // Hard-coded in every vertex shader.
939         glEnableVertexArrayAttrib(vao, position_attrib);
940         glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
941 }
942
943 pair<GLuint, GLuint> Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
944 {
945         GPUTimers timers;
946
947         ScopedTimer total_timer("Interpolate", &timers);
948
949         glBindVertexArray(vao);
950         glDisable(GL_DITHER);
951
952         // Pick out the right level to test splatting results on.
953         GLuint tex_view;
954         glGenTextures(1, &tex_view);
955         glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2);
956
957         int flow_width = width >> flow_level;
958         int flow_height = height >> flow_level;
959
960         GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
961         GLuint depth_rb = pool.get_renderbuffer(GL_DEPTH_COMPONENT16, flow_width, flow_height);  // Used for ranking flows.
962
963         {
964                 ScopedTimer timer("Splat", &total_timer);
965                 splat.exec(tex_view, bidirectional_flow_tex, flow_tex, depth_rb, flow_width, flow_height, alpha);
966         }
967         glDeleteTextures(1, &tex_view);
968
969         GLuint temp_tex[3];
970         temp_tex[0] = pool.get_texture(GL_RG16F, flow_width, flow_height);
971         temp_tex[1] = pool.get_texture(GL_RG16F, flow_width, flow_height);
972         temp_tex[2] = pool.get_texture(GL_RG16F, flow_width, flow_height);
973
974         {
975                 ScopedTimer timer("Fill holes", &total_timer);
976                 hole_fill.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
977                 hole_blend.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
978         }
979
980         pool.release_texture(temp_tex[0]);
981         pool.release_texture(temp_tex[1]);
982         pool.release_texture(temp_tex[2]);
983         pool.release_renderbuffer(depth_rb);
984
985         GLuint output_tex, output2_tex = 0;
986         if (split_ycbcr_output) {
987                 output_tex = pool.get_texture(GL_R8, width, height);
988                 output2_tex = pool.get_texture(GL_RG8, width, height);
989                 {
990                         ScopedTimer timer("Blend", &total_timer);
991                         blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha);
992                 }
993         } else {
994                 output_tex = pool.get_texture(GL_RGBA8, width, height);
995                 {
996                         ScopedTimer timer("Blend", &total_timer);
997                         blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha);
998                 }
999         }
1000         pool.release_texture(flow_tex);
1001         total_timer.end();
1002         if (!in_warmup) {
1003                 timers.print();
1004         }
1005
1006         return make_pair(output_tex, output2_tex);
1007 }
1008
1009 GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers)
1010 {
1011         {
1012                 lock_guard<mutex> lock(mu);
1013                 for (Texture &tex : textures) {
1014                         if (!tex.in_use && !tex.is_renderbuffer && tex.format == format &&
1015                             tex.width == width && tex.height == height && tex.num_layers == num_layers) {
1016                                 tex.in_use = true;
1017                                 return tex.tex_num;
1018                         }
1019                 }
1020         }
1021
1022         Texture tex;
1023         if (num_layers == 0) {
1024                 glCreateTextures(GL_TEXTURE_2D, 1, &tex.tex_num);
1025                 glTextureStorage2D(tex.tex_num, 1, format, width, height);
1026         } else {
1027                 glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex.tex_num);
1028                 glTextureStorage3D(tex.tex_num, 1, format, width, height, num_layers);
1029         }
1030         tex.format = format;
1031         tex.width = width;
1032         tex.height = height;
1033         tex.num_layers = num_layers;
1034         tex.in_use = true;
1035         tex.is_renderbuffer = false;
1036         {
1037                 lock_guard<mutex> lock(mu);
1038                 textures.push_back(tex);
1039         }
1040         return tex.tex_num;
1041 }
1042
1043 GLuint TexturePool::get_renderbuffer(GLenum format, GLuint width, GLuint height)
1044 {
1045         {
1046                 lock_guard<mutex> lock(mu);
1047                 for (Texture &tex : textures) {
1048                         if (!tex.in_use && tex.is_renderbuffer && tex.format == format &&
1049                             tex.width == width && tex.height == height) {
1050                                 tex.in_use = true;
1051                                 return tex.tex_num;
1052                         }
1053                 }
1054         }
1055
1056         Texture tex;
1057         glCreateRenderbuffers(1, &tex.tex_num);
1058         glNamedRenderbufferStorage(tex.tex_num, format, width, height);
1059
1060         tex.format = format;
1061         tex.width = width;
1062         tex.height = height;
1063         tex.in_use = true;
1064         tex.is_renderbuffer = true;
1065         {
1066                 lock_guard<mutex> lock(mu);
1067                 textures.push_back(tex);
1068         }
1069         return tex.tex_num;
1070 }
1071
1072 void TexturePool::release_texture(GLuint tex_num)
1073 {
1074         lock_guard<mutex> lock(mu);
1075         for (Texture &tex : textures) {
1076                 if (!tex.is_renderbuffer && tex.tex_num == tex_num) {
1077                         assert(tex.in_use);
1078                         tex.in_use = false;
1079                         return;
1080                 }
1081         }
1082         assert(false);
1083 }
1084
1085 void TexturePool::release_renderbuffer(GLuint tex_num)
1086 {
1087         lock_guard<mutex> lock(mu);
1088         for (Texture &tex : textures) {
1089                 if (tex.is_renderbuffer && tex.tex_num == tex_num) {
1090                         assert(tex.in_use);
1091                         tex.in_use = false;
1092                         return;
1093                 }
1094         }
1095         //assert(false);
1096 }