X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=flow.cpp;h=5125d26b53a5d2f09bde21132136d2a6ec333b5c;hb=3795723be95f2fe82f3c8b8b45b1a905b2c811fd;hp=873bdf294b1143aaf6eec681419419d5c513e844;hpb=6175012d6d15e68a59c535a7dec65884d7e1bfa6;p=nageru diff --git a/flow.cpp b/flow.cpp index 873bdf2..5125d26 100644 --- a/flow.cpp +++ b/flow.cpp @@ -1,21 +1,22 @@ #define NO_SDL_GLEXT 1 -#include - -#include -#include -#include -#include - #include "flow.h" + +#include "embedded_files.h" #include "gpu_timers.h" #include "util.h" #include +#include #include -#include +#include +#include #include +#include #include +#include +#include +#include #include #define BUFFER_OFFSET(i) ((char *)nullptr + (i)) @@ -49,10 +50,18 @@ int find_num_levels(int width, int height) return levels; } -string read_file(const string &filename) +string read_file(const string &filename, const unsigned char *start = nullptr, const size_t size = 0) { FILE *fp = fopen(filename.c_str(), "r"); if (fp == nullptr) { + // Fall back to the version we compiled in. (We prefer disk if we can, + // since that makes it possible to work on shaders without recompiling + // all the time.) + if (start != nullptr) { + return string(reinterpret_cast(start), + reinterpret_cast(start) + size); + } + perror(filename.c_str()); exit(1); } @@ -63,7 +72,7 @@ string read_file(const string &filename) exit(1); } - int size = ftell(fp); + int disk_size = ftell(fp); ret = fseek(fp, 0, SEEK_SET); if (ret == -1) { @@ -72,15 +81,15 @@ string read_file(const string &filename) } string str; - str.resize(size); - ret = fread(&str[0], size, 1, fp); + str.resize(disk_size); + ret = fread(&str[0], disk_size, 1, fp); if (ret == -1) { perror("fread"); exit(1); } if (ret == 0) { fprintf(stderr, "Short read when trying to read %d bytes from %s\n", - size, filename.c_str()); + disk_size, filename.c_str()); exit(1); } fclose(fp); @@ -88,11 +97,10 @@ string read_file(const string &filename) return str; } - GLuint compile_shader(const string &shader_src, GLenum type) { GLuint obj = glCreateShader(type); - const GLchar* source[] = { shader_src.data() }; + const GLchar *source[] = { shader_src.data() }; const GLint length[] = { (GLint)shader_src.size() }; glShaderSource(obj, 1, source, length); glCompileShader(obj); @@ -204,8 +212,8 @@ void PersistentFBOSetWithDepth::render_to(GLuint depth_rb, const a GrayscaleConversion::GrayscaleConversion() { - gray_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - gray_fs_obj = compile_shader(read_file("gray.frag"), GL_FRAGMENT_SHADER); + gray_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + gray_fs_obj = compile_shader(read_file("gray.frag", _binary_gray_frag_data, _binary_gray_frag_size), GL_FRAGMENT_SHADER); gray_program = link_program(gray_vs_obj, gray_fs_obj); // Set up the VAO containing all the required position/texcoord data. @@ -233,8 +241,8 @@ void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height, Sobel::Sobel() { - sobel_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - sobel_fs_obj = compile_shader(read_file("sobel.frag"), GL_FRAGMENT_SHADER); + sobel_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + sobel_fs_obj = compile_shader(read_file("sobel.frag", _binary_sobel_frag_data, _binary_sobel_frag_size), GL_FRAGMENT_SHADER); sobel_program = link_program(sobel_vs_obj, sobel_fs_obj); uniform_tex = glGetUniformLocation(sobel_program, "tex"); @@ -254,8 +262,8 @@ void Sobel::exec(GLint tex_view, GLint grad_tex, int level_width, int level_heig MotionSearch::MotionSearch(const OperatingPoint &op) : op(op) { - motion_vs_obj = compile_shader(read_file("motion_search.vert"), GL_VERTEX_SHADER); - motion_fs_obj = compile_shader(read_file("motion_search.frag"), GL_FRAGMENT_SHADER); + motion_vs_obj = compile_shader(read_file("motion_search.vert", _binary_motion_search_vert_data, _binary_motion_search_vert_size), GL_VERTEX_SHADER); + motion_fs_obj = compile_shader(read_file("motion_search.frag", _binary_motion_search_frag_data, _binary_motion_search_frag_size), GL_FRAGMENT_SHADER); motion_search_program = link_program(motion_vs_obj, motion_fs_obj); uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size"); @@ -290,8 +298,8 @@ void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuin Densify::Densify(const OperatingPoint &op) : op(op) { - densify_vs_obj = compile_shader(read_file("densify.vert"), GL_VERTEX_SHADER); - densify_fs_obj = compile_shader(read_file("densify.frag"), GL_FRAGMENT_SHADER); + densify_vs_obj = compile_shader(read_file("densify.vert", _binary_densify_vert_data, _binary_densify_vert_size), GL_VERTEX_SHADER); + densify_fs_obj = compile_shader(read_file("densify.frag", _binary_densify_frag_data, _binary_densify_frag_size), GL_FRAGMENT_SHADER); densify_program = link_program(densify_vs_obj, densify_fs_obj); uniform_patch_size = glGetUniformLocation(densify_program, "patch_size"); @@ -321,8 +329,8 @@ void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int Prewarp::Prewarp() { - prewarp_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - prewarp_fs_obj = compile_shader(read_file("prewarp.frag"), GL_FRAGMENT_SHADER); + prewarp_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + prewarp_fs_obj = compile_shader(read_file("prewarp.frag", _binary_prewarp_frag_data, _binary_prewarp_frag_size), GL_FRAGMENT_SHADER); prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj); uniform_image_tex = glGetUniformLocation(prewarp_program, "image_tex"); @@ -344,8 +352,8 @@ void Prewarp::exec(GLuint tex_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_te Derivatives::Derivatives() { - derivatives_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - derivatives_fs_obj = compile_shader(read_file("derivatives.frag"), GL_FRAGMENT_SHADER); + derivatives_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + derivatives_fs_obj = compile_shader(read_file("derivatives.frag", _binary_derivatives_frag_data, _binary_derivatives_frag_size), GL_FRAGMENT_SHADER); derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj); uniform_tex = glGetUniformLocation(derivatives_program, "tex"); @@ -365,8 +373,8 @@ void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, in ComputeDiffusivity::ComputeDiffusivity() { - diffusivity_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag"), GL_FRAGMENT_SHADER); + diffusivity_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag", _binary_diffusivity_frag_data, _binary_diffusivity_frag_size), GL_FRAGMENT_SHADER); diffusivity_program = link_program(diffusivity_vs_obj, diffusivity_fs_obj); uniform_flow_tex = glGetUniformLocation(diffusivity_program, "flow_tex"); @@ -393,8 +401,8 @@ void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diff SetupEquations::SetupEquations() { - equations_vs_obj = compile_shader(read_file("equations.vert"), GL_VERTEX_SHADER); - equations_fs_obj = compile_shader(read_file("equations.frag"), GL_FRAGMENT_SHADER); + equations_vs_obj = compile_shader(read_file("equations.vert", _binary_equations_vert_data, _binary_equations_vert_size), GL_VERTEX_SHADER); + equations_fs_obj = compile_shader(read_file("equations.frag", _binary_equations_frag_data, _binary_equations_frag_size), GL_FRAGMENT_SHADER); equations_program = link_program(equations_vs_obj, equations_fs_obj); uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex"); @@ -430,8 +438,8 @@ void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex SOR::SOR() { - sor_vs_obj = compile_shader(read_file("sor.vert"), GL_VERTEX_SHADER); - sor_fs_obj = compile_shader(read_file("sor.frag"), GL_FRAGMENT_SHADER); + sor_vs_obj = compile_shader(read_file("sor.vert", _binary_sor_vert_data, _binary_sor_vert_size), GL_VERTEX_SHADER); + sor_fs_obj = compile_shader(read_file("sor.frag", _binary_sor_frag_data, _binary_sor_frag_size), GL_FRAGMENT_SHADER); sor_program = link_program(sor_vs_obj, sor_fs_obj); uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex"); @@ -492,8 +500,8 @@ void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_bl AddBaseFlow::AddBaseFlow() { - add_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag"), GL_FRAGMENT_SHADER); + add_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag", _binary_add_base_flow_frag_data, _binary_add_base_flow_frag_size), GL_FRAGMENT_SHADER); add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj); uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex"); @@ -515,8 +523,8 @@ void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_wid ResizeFlow::ResizeFlow() { - resize_flow_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag"), GL_FRAGMENT_SHADER); + resize_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag", _binary_resize_flow_frag_data, _binary_resize_flow_frag_size), GL_FRAGMENT_SHADER); resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj); uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex"); @@ -772,22 +780,22 @@ GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStra Splat::Splat(const OperatingPoint &op) : op(op) { - splat_vs_obj = compile_shader(read_file("splat.vert"), GL_VERTEX_SHADER); - splat_fs_obj = compile_shader(read_file("splat.frag"), GL_FRAGMENT_SHADER); + splat_vs_obj = compile_shader(read_file("splat.vert", _binary_splat_vert_data, _binary_splat_vert_size), GL_VERTEX_SHADER); + splat_fs_obj = compile_shader(read_file("splat.frag", _binary_splat_frag_data, _binary_splat_frag_size), GL_FRAGMENT_SHADER); splat_program = link_program(splat_vs_obj, splat_fs_obj); uniform_splat_size = glGetUniformLocation(splat_program, "splat_size"); uniform_alpha = glGetUniformLocation(splat_program, "alpha"); - uniform_image_tex = glGetUniformLocation(splat_program, "image_tex"); + uniform_gray_tex = glGetUniformLocation(splat_program, "gray_tex"); uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex"); uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size"); } -void Splat::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha) +void Splat::exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha) { glUseProgram(splat_program); - bind_sampler(splat_program, uniform_image_tex, 0, image_tex, linear_sampler); + bind_sampler(splat_program, uniform_gray_tex, 0, gray_tex, linear_sampler); bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler); glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height); @@ -815,8 +823,8 @@ void Splat::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_te HoleFill::HoleFill() { - fill_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER); - fill_fs_obj = compile_shader(read_file("hole_fill.frag"), GL_FRAGMENT_SHADER); + fill_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER); + fill_fs_obj = compile_shader(read_file("hole_fill.frag", _binary_hole_fill_frag_data, _binary_hole_fill_frag_size), GL_FRAGMENT_SHADER); fill_program = link_program(fill_vs_obj, fill_fs_obj); uniform_tex = glGetUniformLocation(fill_program, "tex"); @@ -879,8 +887,8 @@ void HoleFill::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int wi HoleBlend::HoleBlend() { - blend_vs_obj = compile_shader(read_file("hole_fill.vert"), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill. - blend_fs_obj = compile_shader(read_file("hole_blend.frag"), GL_FRAGMENT_SHADER); + blend_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill. + blend_fs_obj = compile_shader(read_file("hole_blend.frag", _binary_hole_blend_frag_data, _binary_hole_blend_frag_size), GL_FRAGMENT_SHADER); blend_program = link_program(blend_vs_obj, blend_fs_obj); uniform_left_tex = glGetUniformLocation(blend_program, "left_tex"); @@ -915,10 +923,19 @@ void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int w glDisable(GL_DEPTH_TEST); } -Blend::Blend() +Blend::Blend(bool split_ycbcr_output) + : split_ycbcr_output(split_ycbcr_output) { - blend_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER); - blend_fs_obj = compile_shader(read_file("blend.frag"), GL_FRAGMENT_SHADER); + string frag_shader = read_file("blend.frag", _binary_blend_frag_data, _binary_blend_frag_size); + if (split_ycbcr_output) { + // Insert after the first #version line. + size_t offset = frag_shader.find('\n'); + assert(offset != string::npos); + frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1); + } + + blend_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER); + blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER); blend_program = link_program(blend_vs_obj, blend_fs_obj); uniform_image_tex = glGetUniformLocation(blend_program, "image_tex"); @@ -927,7 +944,7 @@ Blend::Blend() uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance"); } -void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int level_width, int level_height, float alpha) +void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha) { glUseProgram(blend_program); bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler); @@ -935,13 +952,20 @@ void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int level glProgramUniform1f(blend_program, uniform_alpha, alpha); glViewport(0, 0, level_width, level_height); - fbos.render_to(output_tex); + if (split_ycbcr_output) { + fbos_split.render_to(output_tex, output2_tex); + } else { + fbos.render_to(output_tex); + } glDisable(GL_BLEND); // A bit ironic, perhaps. glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } -Interpolate::Interpolate(int width, int height, const OperatingPoint &op) - : width(width), height(height), flow_level(op.finest_level), op(op), splat(op) { +Interpolate::Interpolate(const OperatingPoint &op, bool split_ycbcr_output) + : flow_level(op.finest_level), + split_ycbcr_output(split_ycbcr_output), + splat(op), + blend(split_ycbcr_output) { // Set up the vertex data that will be shared between all passes. float vertices[] = { 0.0f, 1.0f, @@ -961,7 +985,7 @@ Interpolate::Interpolate(int width, int height, const OperatingPoint &op) glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); } -GLuint Interpolate::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha) +pair Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha) { GPUTimers timers; @@ -973,7 +997,7 @@ GLuint Interpolate::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint // Pick out the right level to test splatting results on. GLuint tex_view; glGenTextures(1, &tex_view); - glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, image_tex, GL_RGBA8, flow_level, 1, 0, 2); + glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2); int flow_width = width >> flow_level; int flow_height = height >> flow_level; @@ -1003,10 +1027,20 @@ GLuint Interpolate::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint pool.release_texture(temp_tex[2]); pool.release_renderbuffer(depth_rb); - GLuint output_tex = pool.get_texture(GL_RGBA8, width, height); - { - ScopedTimer timer("Blend", &total_timer); - blend.exec(image_tex, flow_tex, output_tex, width, height, alpha); + GLuint output_tex, output2_tex = 0; + if (split_ycbcr_output) { + output_tex = pool.get_texture(GL_R8, width, height); + output2_tex = pool.get_texture(GL_RG8, width, height); + { + ScopedTimer timer("Blend", &total_timer); + blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha); + } + } else { + output_tex = pool.get_texture(GL_RGBA8, width, height); + { + ScopedTimer timer("Blend", &total_timer); + blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha); + } } pool.release_texture(flow_tex); total_timer.end(); @@ -1014,16 +1048,19 @@ GLuint Interpolate::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint timers.print(); } - return output_tex; + return make_pair(output_tex, output2_tex); } GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers) { - for (Texture &tex : textures) { - if (!tex.in_use && !tex.is_renderbuffer && tex.format == format && - tex.width == width && tex.height == height && tex.num_layers == num_layers) { - tex.in_use = true; - return tex.tex_num; + { + lock_guard lock(mu); + for (Texture &tex : textures) { + if (!tex.in_use && !tex.is_renderbuffer && tex.format == format && + tex.width == width && tex.height == height && tex.num_layers == num_layers) { + tex.in_use = true; + return tex.tex_num; + } } } @@ -1041,17 +1078,23 @@ GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLui tex.num_layers = num_layers; tex.in_use = true; tex.is_renderbuffer = false; - textures.push_back(tex); + { + lock_guard lock(mu); + textures.push_back(tex); + } return tex.tex_num; } GLuint TexturePool::get_renderbuffer(GLenum format, GLuint width, GLuint height) { - for (Texture &tex : textures) { - if (!tex.in_use && tex.is_renderbuffer && tex.format == format && - tex.width == width && tex.height == height) { - tex.in_use = true; - return tex.tex_num; + { + lock_guard lock(mu); + for (Texture &tex : textures) { + if (!tex.in_use && tex.is_renderbuffer && tex.format == format && + tex.width == width && tex.height == height) { + tex.in_use = true; + return tex.tex_num; + } } } @@ -1064,12 +1107,16 @@ GLuint TexturePool::get_renderbuffer(GLenum format, GLuint width, GLuint height) tex.height = height; tex.in_use = true; tex.is_renderbuffer = true; - textures.push_back(tex); + { + lock_guard lock(mu); + textures.push_back(tex); + } return tex.tex_num; } void TexturePool::release_texture(GLuint tex_num) { + lock_guard lock(mu); for (Texture &tex : textures) { if (!tex.is_renderbuffer && tex.tex_num == tex_num) { assert(tex.in_use); @@ -1082,6 +1129,7 @@ void TexturePool::release_texture(GLuint tex_num) void TexturePool::release_renderbuffer(GLuint tex_num) { + lock_guard lock(mu); for (Texture &tex : textures) { if (tex.is_renderbuffer && tex.tex_num == tex_num) { assert(tex.in_use);