5 #include "embedded_files.h"
6 #include "gpu_timers.h"
22 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
26 // Weighting constants for the different parts of the variational refinement.
27 // These don't correspond 1:1 to the values given in the DIS paper,
28 // since we have different normalizations and ranges in some cases.
29 // These are found through a simple grid search on some MPI-Sintel data,
30 // although the error (EPE) seems to be fairly insensitive to the precise values.
31 // Only the relative values matter, so we fix alpha (the smoothness constant)
32 // at unity and tweak the others.
34 // TODO: Maybe this should not be global.
35 float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
37 // Some global OpenGL objects.
38 // TODO: These should really be part of DISComputeFlow.
39 GLuint nearest_sampler, linear_sampler, zero_border_sampler;
42 int find_num_levels(int width, int height)
45 for (int w = width, h = height; w > 1 || h > 1; ) {
53 string read_file(const string &filename, const unsigned char *start = nullptr, const size_t size = 0)
55 FILE *fp = fopen(filename.c_str(), "r");
57 // Fall back to the version we compiled in. (We prefer disk if we can,
58 // since that makes it possible to work on shaders without recompiling
60 if (start != nullptr) {
61 return string(reinterpret_cast<const char *>(start),
62 reinterpret_cast<const char *>(start) + size);
65 perror(filename.c_str());
69 int ret = fseek(fp, 0, SEEK_END);
71 perror("fseek(SEEK_END)");
75 int disk_size = ftell(fp);
77 ret = fseek(fp, 0, SEEK_SET);
79 perror("fseek(SEEK_SET)");
84 str.resize(disk_size);
85 ret = fread(&str[0], disk_size, 1, fp);
91 fprintf(stderr, "Short read when trying to read %d bytes from %s\n",
92 disk_size, filename.c_str());
100 GLuint compile_shader(const string &shader_src, GLenum type)
102 GLuint obj = glCreateShader(type);
103 const GLchar *source[] = { shader_src.data() };
104 const GLint length[] = { (GLint)shader_src.size() };
105 glShaderSource(obj, 1, source, length);
106 glCompileShader(obj);
108 GLchar info_log[4096];
109 GLsizei log_length = sizeof(info_log) - 1;
110 glGetShaderInfoLog(obj, log_length, &log_length, info_log);
111 info_log[log_length] = 0;
112 if (strlen(info_log) > 0) {
113 fprintf(stderr, "Shader compile log: %s\n", info_log);
117 glGetShaderiv(obj, GL_COMPILE_STATUS, &status);
118 if (status == GL_FALSE) {
119 // Add some line numbers to easier identify compile errors.
120 string src_with_lines = "/* 1 */ ";
122 for (char ch : shader_src) {
123 src_with_lines.push_back(ch);
126 snprintf(buf, sizeof(buf), "/* %3zu */ ", ++lineno);
127 src_with_lines += buf;
131 fprintf(stderr, "Failed to compile shader:\n%s\n", src_with_lines.c_str());
138 GLuint link_program(GLuint vs_obj, GLuint fs_obj)
140 GLuint program = glCreateProgram();
141 glAttachShader(program, vs_obj);
142 glAttachShader(program, fs_obj);
143 glLinkProgram(program);
145 glGetProgramiv(program, GL_LINK_STATUS, &success);
146 if (success == GL_FALSE) {
147 GLchar error_log[1024] = {0};
148 glGetProgramInfoLog(program, 1024, nullptr, error_log);
149 fprintf(stderr, "Error linking program: %s\n", error_log);
155 void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler)
157 if (location == -1) {
161 glBindTextureUnit(texture_unit, tex);
162 glBindSampler(texture_unit, sampler);
163 glProgramUniform1i(program, location, texture_unit);
166 template<size_t num_elements>
167 void PersistentFBOSet<num_elements>::render_to(const array<GLuint, num_elements> &textures)
169 auto it = fbos.find(textures);
170 if (it != fbos.end()) {
171 glBindFramebuffer(GL_FRAMEBUFFER, it->second);
176 glCreateFramebuffers(1, &fbo);
177 GLenum bufs[num_elements];
178 for (size_t i = 0; i < num_elements; ++i) {
179 glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
180 bufs[i] = GL_COLOR_ATTACHMENT0 + i;
182 glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
184 fbos[textures] = fbo;
185 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
188 template<size_t num_elements>
189 void PersistentFBOSetWithDepth<num_elements>::render_to(GLuint depth_rb, const array<GLuint, num_elements> &textures)
191 auto key = make_pair(depth_rb, textures);
193 auto it = fbos.find(key);
194 if (it != fbos.end()) {
195 glBindFramebuffer(GL_FRAMEBUFFER, it->second);
200 glCreateFramebuffers(1, &fbo);
201 GLenum bufs[num_elements];
202 glNamedFramebufferRenderbuffer(fbo, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_rb);
203 for (size_t i = 0; i < num_elements; ++i) {
204 glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
205 bufs[i] = GL_COLOR_ATTACHMENT0 + i;
207 glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
210 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
213 GrayscaleConversion::GrayscaleConversion()
215 gray_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
216 gray_fs_obj = compile_shader(read_file("gray.frag", _binary_gray_frag_data, _binary_gray_frag_size), GL_FRAGMENT_SHADER);
217 gray_program = link_program(gray_vs_obj, gray_fs_obj);
219 // Set up the VAO containing all the required position/texcoord data.
220 glCreateVertexArrays(1, &gray_vao);
221 glBindVertexArray(gray_vao);
223 GLint position_attrib = glGetAttribLocation(gray_program, "position");
224 glEnableVertexArrayAttrib(gray_vao, position_attrib);
225 glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
227 uniform_tex = glGetUniformLocation(gray_program, "tex");
230 void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height, int num_layers)
232 glUseProgram(gray_program);
233 bind_sampler(gray_program, uniform_tex, 0, tex, nearest_sampler);
235 glViewport(0, 0, width, height);
236 fbos.render_to(gray_tex);
237 glBindVertexArray(gray_vao);
239 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
244 sobel_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
245 sobel_fs_obj = compile_shader(read_file("sobel.frag", _binary_sobel_frag_data, _binary_sobel_frag_size), GL_FRAGMENT_SHADER);
246 sobel_program = link_program(sobel_vs_obj, sobel_fs_obj);
248 uniform_tex = glGetUniformLocation(sobel_program, "tex");
251 void Sobel::exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers)
253 glUseProgram(sobel_program);
254 bind_sampler(sobel_program, uniform_tex, 0, tex_view, nearest_sampler);
256 glViewport(0, 0, level_width, level_height);
257 fbos.render_to(grad_tex);
259 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
262 MotionSearch::MotionSearch(const OperatingPoint &op)
265 motion_vs_obj = compile_shader(read_file("motion_search.vert", _binary_motion_search_vert_data, _binary_motion_search_vert_size), GL_VERTEX_SHADER);
266 motion_fs_obj = compile_shader(read_file("motion_search.frag", _binary_motion_search_frag_data, _binary_motion_search_frag_size), GL_FRAGMENT_SHADER);
267 motion_search_program = link_program(motion_vs_obj, motion_fs_obj);
269 uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
270 uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
271 uniform_out_flow_size = glGetUniformLocation(motion_search_program, "out_flow_size");
272 uniform_image_tex = glGetUniformLocation(motion_search_program, "image_tex");
273 uniform_grad_tex = glGetUniformLocation(motion_search_program, "grad_tex");
274 uniform_flow_tex = glGetUniformLocation(motion_search_program, "flow_tex");
275 uniform_patch_size = glGetUniformLocation(motion_search_program, "patch_size");
276 uniform_num_iterations = glGetUniformLocation(motion_search_program, "num_iterations");
279 void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers)
281 glUseProgram(motion_search_program);
283 bind_sampler(motion_search_program, uniform_image_tex, 0, tex_view, linear_sampler);
284 bind_sampler(motion_search_program, uniform_grad_tex, 1, grad_tex, nearest_sampler);
285 bind_sampler(motion_search_program, uniform_flow_tex, 2, flow_tex, linear_sampler);
287 glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
288 glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
289 glProgramUniform2f(motion_search_program, uniform_out_flow_size, width_patches, height_patches);
290 glProgramUniform1ui(motion_search_program, uniform_patch_size, op.patch_size_pixels);
291 glProgramUniform1ui(motion_search_program, uniform_num_iterations, op.search_iterations);
293 glViewport(0, 0, width_patches, height_patches);
294 fbos.render_to(flow_out_tex);
295 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
298 Densify::Densify(const OperatingPoint &op)
301 densify_vs_obj = compile_shader(read_file("densify.vert", _binary_densify_vert_data, _binary_densify_vert_size), GL_VERTEX_SHADER);
302 densify_fs_obj = compile_shader(read_file("densify.frag", _binary_densify_frag_data, _binary_densify_frag_size), GL_FRAGMENT_SHADER);
303 densify_program = link_program(densify_vs_obj, densify_fs_obj);
305 uniform_patch_size = glGetUniformLocation(densify_program, "patch_size");
306 uniform_image_tex = glGetUniformLocation(densify_program, "image_tex");
307 uniform_flow_tex = glGetUniformLocation(densify_program, "flow_tex");
310 void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers)
312 glUseProgram(densify_program);
314 bind_sampler(densify_program, uniform_image_tex, 0, tex_view, linear_sampler);
315 bind_sampler(densify_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
317 glProgramUniform2f(densify_program, uniform_patch_size,
318 float(op.patch_size_pixels) / level_width,
319 float(op.patch_size_pixels) / level_height);
321 glViewport(0, 0, level_width, level_height);
323 glBlendFunc(GL_ONE, GL_ONE);
324 fbos.render_to(dense_flow_tex);
325 glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
326 glClear(GL_COLOR_BUFFER_BIT);
327 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width_patches * height_patches * num_layers);
332 prewarp_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
333 prewarp_fs_obj = compile_shader(read_file("prewarp.frag", _binary_prewarp_frag_data, _binary_prewarp_frag_size), GL_FRAGMENT_SHADER);
334 prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj);
336 uniform_image_tex = glGetUniformLocation(prewarp_program, "image_tex");
337 uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
340 void Prewarp::exec(GLuint tex_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height, int num_layers)
342 glUseProgram(prewarp_program);
344 bind_sampler(prewarp_program, uniform_image_tex, 0, tex_view, linear_sampler);
345 bind_sampler(prewarp_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
347 glViewport(0, 0, level_width, level_height);
349 fbos.render_to(I_tex, I_t_tex, normalized_flow_tex);
350 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
353 Derivatives::Derivatives()
355 derivatives_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
356 derivatives_fs_obj = compile_shader(read_file("derivatives.frag", _binary_derivatives_frag_data, _binary_derivatives_frag_size), GL_FRAGMENT_SHADER);
357 derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj);
359 uniform_tex = glGetUniformLocation(derivatives_program, "tex");
362 void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers)
364 glUseProgram(derivatives_program);
366 bind_sampler(derivatives_program, uniform_tex, 0, input_tex, nearest_sampler);
368 glViewport(0, 0, level_width, level_height);
370 fbos.render_to(I_x_y_tex, beta_0_tex);
371 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
374 ComputeDiffusivity::ComputeDiffusivity()
376 diffusivity_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
377 diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag", _binary_diffusivity_frag_data, _binary_diffusivity_frag_size), GL_FRAGMENT_SHADER);
378 diffusivity_program = link_program(diffusivity_vs_obj, diffusivity_fs_obj);
380 uniform_flow_tex = glGetUniformLocation(diffusivity_program, "flow_tex");
381 uniform_diff_flow_tex = glGetUniformLocation(diffusivity_program, "diff_flow_tex");
382 uniform_alpha = glGetUniformLocation(diffusivity_program, "alpha");
383 uniform_zero_diff_flow = glGetUniformLocation(diffusivity_program, "zero_diff_flow");
386 void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
388 glUseProgram(diffusivity_program);
390 bind_sampler(diffusivity_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
391 bind_sampler(diffusivity_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler);
392 glProgramUniform1f(diffusivity_program, uniform_alpha, vr_alpha);
393 glProgramUniform1i(diffusivity_program, uniform_zero_diff_flow, zero_diff_flow);
395 glViewport(0, 0, level_width, level_height);
398 fbos.render_to(diffusivity_tex);
399 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
402 SetupEquations::SetupEquations()
404 equations_vs_obj = compile_shader(read_file("equations.vert", _binary_equations_vert_data, _binary_equations_vert_size), GL_VERTEX_SHADER);
405 equations_fs_obj = compile_shader(read_file("equations.frag", _binary_equations_frag_data, _binary_equations_frag_size), GL_FRAGMENT_SHADER);
406 equations_program = link_program(equations_vs_obj, equations_fs_obj);
408 uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex");
409 uniform_I_t_tex = glGetUniformLocation(equations_program, "I_t_tex");
410 uniform_diff_flow_tex = glGetUniformLocation(equations_program, "diff_flow_tex");
411 uniform_base_flow_tex = glGetUniformLocation(equations_program, "base_flow_tex");
412 uniform_beta_0_tex = glGetUniformLocation(equations_program, "beta_0_tex");
413 uniform_diffusivity_tex = glGetUniformLocation(equations_program, "diffusivity_tex");
414 uniform_gamma = glGetUniformLocation(equations_program, "gamma");
415 uniform_delta = glGetUniformLocation(equations_program, "delta");
416 uniform_zero_diff_flow = glGetUniformLocation(equations_program, "zero_diff_flow");
419 void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
421 glUseProgram(equations_program);
423 bind_sampler(equations_program, uniform_I_x_y_tex, 0, I_x_y_tex, nearest_sampler);
424 bind_sampler(equations_program, uniform_I_t_tex, 1, I_t_tex, nearest_sampler);
425 bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
426 bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
427 bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
428 bind_sampler(equations_program, uniform_diffusivity_tex, 5, diffusivity_tex, zero_border_sampler);
429 glProgramUniform1f(equations_program, uniform_delta, vr_delta);
430 glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
431 glProgramUniform1i(equations_program, uniform_zero_diff_flow, zero_diff_flow);
433 glViewport(0, 0, (level_width + 1) / 2, level_height);
435 fbos.render_to(equation_red_tex, equation_black_tex);
436 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
441 sor_vs_obj = compile_shader(read_file("sor.vert", _binary_sor_vert_data, _binary_sor_vert_size), GL_VERTEX_SHADER);
442 sor_fs_obj = compile_shader(read_file("sor.frag", _binary_sor_frag_data, _binary_sor_frag_size), GL_FRAGMENT_SHADER);
443 sor_program = link_program(sor_vs_obj, sor_fs_obj);
445 uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex");
446 uniform_equation_red_tex = glGetUniformLocation(sor_program, "equation_red_tex");
447 uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex");
448 uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex");
449 uniform_phase = glGetUniformLocation(sor_program, "phase");
450 uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases");
453 void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer)
455 glUseProgram(sor_program);
457 bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
458 bind_sampler(sor_program, uniform_diffusivity_tex, 1, diffusivity_tex, zero_border_sampler);
459 bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler);
460 bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler);
462 if (!zero_diff_flow) {
463 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
466 // NOTE: We bind to the texture we are rendering from, but we never write any value
467 // that we read in the same shader pass (we call discard for red values when we compute
468 // black, and vice versa), and we have barriers between the passes, so we're fine
470 glViewport(0, 0, level_width, level_height);
472 fbos.render_to(diff_flow_tex);
474 for (int i = 0; i < num_iterations; ++i) {
476 ScopedTimer timer("Red pass", sor_timer);
477 if (zero_diff_flow && i == 0) {
478 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0);
480 glProgramUniform1i(sor_program, uniform_phase, 0);
481 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
485 ScopedTimer timer("Black pass", sor_timer);
486 if (zero_diff_flow && i == 0) {
487 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1);
489 glProgramUniform1i(sor_program, uniform_phase, 1);
490 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
491 if (zero_diff_flow && i == 0) {
492 glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
494 if (i != num_iterations - 1) {
501 AddBaseFlow::AddBaseFlow()
503 add_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
504 add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag", _binary_add_base_flow_frag_data, _binary_add_base_flow_frag_size), GL_FRAGMENT_SHADER);
505 add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj);
507 uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex");
510 void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers)
512 glUseProgram(add_flow_program);
514 bind_sampler(add_flow_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
516 glViewport(0, 0, level_width, level_height);
518 glBlendFunc(GL_ONE, GL_ONE);
519 fbos.render_to(base_flow_tex);
521 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
524 ResizeFlow::ResizeFlow()
526 resize_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
527 resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag", _binary_resize_flow_frag_data, _binary_resize_flow_frag_size), GL_FRAGMENT_SHADER);
528 resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj);
530 uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex");
531 uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor");
534 void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers)
536 glUseProgram(resize_flow_program);
538 bind_sampler(resize_flow_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
540 glProgramUniform2f(resize_flow_program, uniform_scale_factor, float(output_width) / input_width, float(output_height) / input_height);
542 glViewport(0, 0, output_width, output_height);
544 fbos.render_to(out_tex);
546 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
549 DISComputeFlow::DISComputeFlow(int width, int height, const OperatingPoint &op)
550 : width(width), height(height), op(op), motion_search(op), densify(op)
552 // Make some samplers.
553 glCreateSamplers(1, &nearest_sampler);
554 glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
555 glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
556 glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
557 glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
559 glCreateSamplers(1, &linear_sampler);
560 glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
561 glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
562 glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
563 glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
565 // The smoothness is sampled so that once we get to a smoothness involving
566 // a value outside the border, the diffusivity between the two becomes zero.
567 // Similarly, gradients are zero outside the border, since the edge is taken
569 glCreateSamplers(1, &zero_border_sampler);
570 glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
571 glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
572 glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
573 glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
574 float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f }; // Note that zero alpha means we can also see whether we sampled outside the border or not.
575 glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
577 // Initial flow is zero, 1x1.
578 glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &initial_flow_tex);
579 glTextureStorage3D(initial_flow_tex, 1, GL_RG16F, 1, 1, 1);
580 glClearTexImage(initial_flow_tex, 0, GL_RG, GL_FLOAT, nullptr);
582 // Set up the vertex data that will be shared between all passes.
589 glCreateBuffers(1, &vertex_vbo);
590 glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
592 glCreateVertexArrays(1, &vao);
593 glBindVertexArray(vao);
594 glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
596 GLint position_attrib = 0; // Hard-coded in every vertex shader.
597 glEnableVertexArrayAttrib(vao, position_attrib);
598 glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
601 GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy)
603 int num_layers = (flow_direction == FORWARD_AND_BACKWARD) ? 2 : 1;
604 int prev_level_width = 1, prev_level_height = 1;
605 GLuint prev_level_flow_tex = initial_flow_tex;
609 glBindVertexArray(vao);
610 glDisable(GL_DITHER);
612 ScopedTimer total_timer("Compute flow", &timers);
613 for (int level = op.coarsest_level; level >= int(op.finest_level); --level) {
614 char timer_name[256];
615 snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
616 ScopedTimer level_timer(timer_name, &total_timer);
618 int level_width = width >> level;
619 int level_height = height >> level;
620 float patch_spacing_pixels = op.patch_size_pixels * (1.0f - op.patch_overlap_ratio);
622 // Make sure we have patches at least every Nth pixel, e.g. for width=9
623 // and patch_spacing=3 (the default), we put out patch centers in
624 // x=0, x=3, x=6, x=9, which is four patches. The fragment shader will
625 // lock all the centers to integer coordinates if needed.
626 int width_patches = 1 + ceil(level_width / patch_spacing_pixels);
627 int height_patches = 1 + ceil(level_height / patch_spacing_pixels);
629 // Make sure we always read from the correct level; the chosen
630 // mipmapping could otherwise be rather unpredictable, especially
631 // during motion search.
633 glGenTextures(1, &tex_view);
634 glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, tex, GL_R8, level, 1, 0, 2);
636 // Create a new texture to hold the gradients.
637 GLuint grad_tex = pool.get_texture(GL_R32UI, level_width, level_height, num_layers);
639 // Find the derivative.
641 ScopedTimer timer("Sobel", &level_timer);
642 sobel.exec(tex_view, grad_tex, level_width, level_height, num_layers);
645 // Motion search to find the initial flow. We use the flow from the previous
646 // level (sampled bilinearly; no fancy tricks) as a guide, then search from there.
648 // Create an output flow texture.
649 GLuint flow_out_tex = pool.get_texture(GL_RGB16F, width_patches, height_patches, num_layers);
653 ScopedTimer timer("Motion search", &level_timer);
654 motion_search.exec(tex_view, grad_tex, prev_level_flow_tex, flow_out_tex, level_width, level_height, prev_level_width, prev_level_height, width_patches, height_patches, num_layers);
656 pool.release_texture(grad_tex);
660 // Set up an output texture (cleared in Densify).
661 GLuint dense_flow_tex = pool.get_texture(GL_RGB16F, level_width, level_height, num_layers);
665 ScopedTimer timer("Densification", &level_timer);
666 densify.exec(tex_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches, num_layers);
668 pool.release_texture(flow_out_tex);
670 // Everything below here in the loop belongs to variational refinement.
671 ScopedTimer varref_timer("Variational refinement", &level_timer);
673 // Prewarping; create I and I_t, and a normalized base flow (so we don't
674 // have to normalize it over and over again, and also save some bandwidth).
676 // During the entire rest of the variational refinement, flow will be measured
677 // in pixels, not 0..1 normalized OpenGL texture coordinates.
678 // This is because variational refinement depends so heavily on derivatives,
679 // which are measured in intensity levels per pixel.
680 GLuint I_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
681 GLuint I_t_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
682 GLuint base_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
684 ScopedTimer timer("Prewarping", &varref_timer);
685 prewarp.exec(tex_view, dense_flow_tex, I_tex, I_t_tex, base_flow_tex, level_width, level_height, num_layers);
687 pool.release_texture(dense_flow_tex);
688 glDeleteTextures(1, &tex_view);
690 // TODO: If we don't have variational refinement, we don't need I and I_t,
691 // so computing them is a waste.
692 if (op.variational_refinement) {
693 // Calculate I_x and I_y. We're only calculating first derivatives;
694 // the others will be taken on-the-fly in order to sample from fewer
695 // textures overall, since sampling from the L1 cache is cheap.
696 // (TODO: Verify that this is indeed faster than making separate
697 // double-derivative textures.)
698 GLuint I_x_y_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
699 GLuint beta_0_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
701 ScopedTimer timer("First derivatives", &varref_timer);
702 derivatives.exec(I_tex, I_x_y_tex, beta_0_tex, level_width, level_height, num_layers);
704 pool.release_texture(I_tex);
706 // We need somewhere to store du and dv (the flow increment, relative
707 // to the non-refined base flow u0 and v0). It's initially garbage,
708 // but not read until we've written something sane to it.
709 GLuint diff_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
711 // And for diffusivity.
712 GLuint diffusivity_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
714 // And finally for the equation set. See SetupEquations for
715 // the storage format.
716 GLuint equation_red_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
717 GLuint equation_black_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
719 for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) {
720 // Calculate the diffusivity term for each pixel.
722 ScopedTimer timer("Compute diffusivity", &varref_timer);
723 compute_diffusivity.exec(base_flow_tex, diff_flow_tex, diffusivity_tex, level_width, level_height, outer_idx == 0, num_layers);
726 // Set up the 2x2 equation system for each pixel.
728 ScopedTimer timer("Set up equations", &varref_timer);
729 setup_equations.exec(I_x_y_tex, I_t_tex, diff_flow_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0, num_layers);
732 // Run a few SOR iterations. Note that these are to/from the same texture.
734 ScopedTimer timer("SOR", &varref_timer);
735 sor.exec(diff_flow_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, num_layers, &timer);
739 pool.release_texture(I_t_tex);
740 pool.release_texture(I_x_y_tex);
741 pool.release_texture(beta_0_tex);
742 pool.release_texture(diffusivity_tex);
743 pool.release_texture(equation_red_tex);
744 pool.release_texture(equation_black_tex);
746 // Add the differential flow found by the variational refinement to the base flow,
747 // giving the final flow estimate for this level.
748 // The output is in base_flow_tex; we don't need to make a new texture.
750 ScopedTimer timer("Add differential flow", &varref_timer);
751 add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height, num_layers);
753 pool.release_texture(diff_flow_tex);
756 if (prev_level_flow_tex != initial_flow_tex) {
757 pool.release_texture(prev_level_flow_tex);
759 prev_level_flow_tex = base_flow_tex;
760 prev_level_width = level_width;
761 prev_level_height = level_height;
769 // Scale up the flow to the final size (if needed).
770 if (op.finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
771 return prev_level_flow_tex;
773 GLuint final_tex = pool.get_texture(GL_RG16F, width, height, num_layers);
774 resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height, num_layers);
775 pool.release_texture(prev_level_flow_tex);
780 Splat::Splat(const OperatingPoint &op)
783 splat_vs_obj = compile_shader(read_file("splat.vert", _binary_splat_vert_data, _binary_splat_vert_size), GL_VERTEX_SHADER);
784 splat_fs_obj = compile_shader(read_file("splat.frag", _binary_splat_frag_data, _binary_splat_frag_size), GL_FRAGMENT_SHADER);
785 splat_program = link_program(splat_vs_obj, splat_fs_obj);
787 uniform_splat_size = glGetUniformLocation(splat_program, "splat_size");
788 uniform_alpha = glGetUniformLocation(splat_program, "alpha");
789 uniform_gray_tex = glGetUniformLocation(splat_program, "gray_tex");
790 uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex");
791 uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size");
794 void Splat::exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha)
796 glUseProgram(splat_program);
798 bind_sampler(splat_program, uniform_gray_tex, 0, gray_tex, linear_sampler);
799 bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
801 glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
802 glProgramUniform1f(splat_program, uniform_alpha, alpha);
803 glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
805 glViewport(0, 0, width, height);
807 glEnable(GL_DEPTH_TEST);
808 glDepthMask(GL_TRUE);
809 glDepthFunc(GL_LESS); // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.)
811 fbos.render_to(depth_rb, flow_tex);
813 // Evidently NVIDIA doesn't use fast clears for glClearTexImage, so clear now that
814 // we've got it bound.
815 glClearColor(1000.0f, 1000.0f, 0.0f, 1.0f); // Invalid flow.
816 glClearDepth(1.0f); // Effectively infinity.
817 glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
819 glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height * 2);
821 glDisable(GL_DEPTH_TEST);
826 fill_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER);
827 fill_fs_obj = compile_shader(read_file("hole_fill.frag", _binary_hole_fill_frag_data, _binary_hole_fill_frag_size), GL_FRAGMENT_SHADER);
828 fill_program = link_program(fill_vs_obj, fill_fs_obj);
830 uniform_tex = glGetUniformLocation(fill_program, "tex");
831 uniform_z = glGetUniformLocation(fill_program, "z");
832 uniform_sample_offset = glGetUniformLocation(fill_program, "sample_offset");
835 void HoleFill::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
837 glUseProgram(fill_program);
839 bind_sampler(fill_program, uniform_tex, 0, flow_tex, nearest_sampler);
841 glProgramUniform1f(fill_program, uniform_z, 1.0f - 1.0f / 1024.0f);
843 glViewport(0, 0, width, height);
845 glEnable(GL_DEPTH_TEST);
846 glDepthFunc(GL_LESS); // Only update the values > 0.999f (ie., only invalid pixels).
848 fbos.render_to(depth_rb, flow_tex); // NOTE: Reading and writing to the same texture.
850 // Fill holes from the left, by shifting 1, 2, 4, 8, etc. pixels to the right.
851 for (int offs = 1; offs < width; offs *= 2) {
852 glProgramUniform2f(fill_program, uniform_sample_offset, -offs / float(width), 0.0f);
853 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
856 glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[0], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
858 // Similar to the right; adjust Z a bit down, so that we re-fill the pixels that
859 // were overwritten in the last algorithm.
860 glProgramUniform1f(fill_program, uniform_z, 1.0f - 2.0f / 1024.0f);
861 for (int offs = 1; offs < width; offs *= 2) {
862 glProgramUniform2f(fill_program, uniform_sample_offset, offs / float(width), 0.0f);
863 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
866 glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[1], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
869 glProgramUniform1f(fill_program, uniform_z, 1.0f - 3.0f / 1024.0f);
870 for (int offs = 1; offs < height; offs *= 2) {
871 glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, -offs / float(height));
872 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
875 glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[2], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
878 glProgramUniform1f(fill_program, uniform_z, 1.0f - 4.0f / 1024.0f);
879 for (int offs = 1; offs < height; offs *= 2) {
880 glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, offs / float(height));
881 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
885 glDisable(GL_DEPTH_TEST);
888 HoleBlend::HoleBlend()
890 blend_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill.
891 blend_fs_obj = compile_shader(read_file("hole_blend.frag", _binary_hole_blend_frag_data, _binary_hole_blend_frag_size), GL_FRAGMENT_SHADER);
892 blend_program = link_program(blend_vs_obj, blend_fs_obj);
894 uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
895 uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
896 uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
897 uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
898 uniform_z = glGetUniformLocation(blend_program, "z");
899 uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
902 void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
904 glUseProgram(blend_program);
906 bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
907 bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
908 bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
909 bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
911 glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
912 glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
914 glViewport(0, 0, width, height);
916 glEnable(GL_DEPTH_TEST);
917 glDepthFunc(GL_LEQUAL); // Skip over all of the pixels that were never holes to begin with.
919 fbos.render_to(depth_rb, flow_tex); // NOTE: Reading and writing to the same texture.
921 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
923 glDisable(GL_DEPTH_TEST);
926 Blend::Blend(bool split_ycbcr_output)
927 : split_ycbcr_output(split_ycbcr_output)
929 string frag_shader = read_file("blend.frag", _binary_blend_frag_data, _binary_blend_frag_size);
930 if (split_ycbcr_output) {
931 // Insert after the first #version line.
932 size_t offset = frag_shader.find('\n');
933 assert(offset != string::npos);
934 frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1);
937 blend_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
938 blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER);
939 blend_program = link_program(blend_vs_obj, blend_fs_obj);
941 uniform_image_tex = glGetUniformLocation(blend_program, "image_tex");
942 uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
943 uniform_alpha = glGetUniformLocation(blend_program, "alpha");
944 uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
947 void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha)
949 glUseProgram(blend_program);
950 bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler);
951 bind_sampler(blend_program, uniform_flow_tex, 1, flow_tex, linear_sampler); // May be upsampled.
952 glProgramUniform1f(blend_program, uniform_alpha, alpha);
954 glViewport(0, 0, level_width, level_height);
955 if (split_ycbcr_output) {
956 fbos_split.render_to(output_tex, output2_tex);
958 fbos.render_to(output_tex);
960 glDisable(GL_BLEND); // A bit ironic, perhaps.
961 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
964 Interpolate::Interpolate(const OperatingPoint &op, bool split_ycbcr_output)
965 : flow_level(op.finest_level),
966 split_ycbcr_output(split_ycbcr_output),
968 blend(split_ycbcr_output) {
969 // Set up the vertex data that will be shared between all passes.
976 glCreateBuffers(1, &vertex_vbo);
977 glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
979 glCreateVertexArrays(1, &vao);
980 glBindVertexArray(vao);
981 glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
983 GLint position_attrib = 0; // Hard-coded in every vertex shader.
984 glEnableVertexArrayAttrib(vao, position_attrib);
985 glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
988 pair<GLuint, GLuint> Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
992 ScopedTimer total_timer("Interpolate", &timers);
994 glBindVertexArray(vao);
995 glDisable(GL_DITHER);
997 // Pick out the right level to test splatting results on.
999 glGenTextures(1, &tex_view);
1000 glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2);
1002 int flow_width = width >> flow_level;
1003 int flow_height = height >> flow_level;
1005 GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
1006 GLuint depth_rb = pool.get_renderbuffer(GL_DEPTH_COMPONENT16, flow_width, flow_height); // Used for ranking flows.
1009 ScopedTimer timer("Splat", &total_timer);
1010 splat.exec(tex_view, bidirectional_flow_tex, flow_tex, depth_rb, flow_width, flow_height, alpha);
1012 glDeleteTextures(1, &tex_view);
1015 temp_tex[0] = pool.get_texture(GL_RG16F, flow_width, flow_height);
1016 temp_tex[1] = pool.get_texture(GL_RG16F, flow_width, flow_height);
1017 temp_tex[2] = pool.get_texture(GL_RG16F, flow_width, flow_height);
1020 ScopedTimer timer("Fill holes", &total_timer);
1021 hole_fill.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
1022 hole_blend.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
1025 pool.release_texture(temp_tex[0]);
1026 pool.release_texture(temp_tex[1]);
1027 pool.release_texture(temp_tex[2]);
1028 pool.release_renderbuffer(depth_rb);
1030 GLuint output_tex, output2_tex = 0;
1031 if (split_ycbcr_output) {
1032 output_tex = pool.get_texture(GL_R8, width, height);
1033 output2_tex = pool.get_texture(GL_RG8, width, height);
1035 ScopedTimer timer("Blend", &total_timer);
1036 blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha);
1039 output_tex = pool.get_texture(GL_RGBA8, width, height);
1041 ScopedTimer timer("Blend", &total_timer);
1042 blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha);
1045 pool.release_texture(flow_tex);
1051 return make_pair(output_tex, output2_tex);
1054 GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers)
1057 lock_guard<mutex> lock(mu);
1058 for (Texture &tex : textures) {
1059 if (!tex.in_use && !tex.is_renderbuffer && tex.format == format &&
1060 tex.width == width && tex.height == height && tex.num_layers == num_layers) {
1068 if (num_layers == 0) {
1069 glCreateTextures(GL_TEXTURE_2D, 1, &tex.tex_num);
1070 glTextureStorage2D(tex.tex_num, 1, format, width, height);
1072 glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex.tex_num);
1073 glTextureStorage3D(tex.tex_num, 1, format, width, height, num_layers);
1075 tex.format = format;
1077 tex.height = height;
1078 tex.num_layers = num_layers;
1080 tex.is_renderbuffer = false;
1082 lock_guard<mutex> lock(mu);
1083 textures.push_back(tex);
1088 GLuint TexturePool::get_renderbuffer(GLenum format, GLuint width, GLuint height)
1091 lock_guard<mutex> lock(mu);
1092 for (Texture &tex : textures) {
1093 if (!tex.in_use && tex.is_renderbuffer && tex.format == format &&
1094 tex.width == width && tex.height == height) {
1102 glCreateRenderbuffers(1, &tex.tex_num);
1103 glNamedRenderbufferStorage(tex.tex_num, format, width, height);
1105 tex.format = format;
1107 tex.height = height;
1109 tex.is_renderbuffer = true;
1111 lock_guard<mutex> lock(mu);
1112 textures.push_back(tex);
1117 void TexturePool::release_texture(GLuint tex_num)
1119 lock_guard<mutex> lock(mu);
1120 for (Texture &tex : textures) {
1121 if (!tex.is_renderbuffer && tex.tex_num == tex_num) {
1130 void TexturePool::release_renderbuffer(GLuint tex_num)
1132 lock_guard<mutex> lock(mu);
1133 for (Texture &tex : textures) {
1134 if (tex.is_renderbuffer && tex.tex_num == tex_num) {