From 65c6584f77bff0af0c8e38d1ac90298bcd55e9ac Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 13 Nov 2017 21:58:40 +0100 Subject: [PATCH] Add support for compute shaders. This is currently pretty raw, and there are no effect using it yet, but it's a useful first step for getting the infrastructure in place. --- .gitignore | 2 + Makefile.in | 2 +- compute_shader_test.cpp | 68 +++++++++++++++++++ effect.h | 40 ++++++++++++ effect_chain.cpp | 140 +++++++++++++++++++++++++++++++++------- effect_chain.h | 14 ++++ footer.compute | 7 ++ header.compute | 23 +++++++ identity.compute | 9 +++ init.cpp | 12 +++- init.h | 4 ++ resource_pool.cpp | 69 ++++++++++++++++++-- resource_pool.h | 15 +++++ version.h | 2 +- 14 files changed, 376 insertions(+), 31 deletions(-) create mode 100644 compute_shader_test.cpp create mode 100644 footer.compute create mode 100644 header.compute create mode 100644 identity.compute diff --git a/.gitignore b/.gitignore index 8361137..65ecdd8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ perf.data *.dot demo effect_chain_test +compute_shader_test gamma_compression_effect_test gamma_expansion_effect_test alpha_multiplication_effect_test @@ -43,6 +44,7 @@ vignette_effect_test ycbcr_conversion_effect_test deinterlace_effect_test chain-*.frag +chain-*.compute movit.info coverage/ aclocal.m4 diff --git a/Makefile.in b/Makefile.in index c591dfd..02d40a7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -88,7 +88,7 @@ UNTESTED_EFFECTS += fft_input EFFECTS = $(TESTED_EFFECTS) $(UNTESTED_EFFECTS) # Unit tests. -TESTS=effect_chain_test fp16_test $(TESTED_INPUTS:=_test) $(TESTED_EFFECTS:=_test) +TESTS=effect_chain_test compute_shader_test fp16_test $(TESTED_INPUTS:=_test) $(TESTED_EFFECTS:=_test) LIB_OBJS=effect_util.o util.o effect.o effect_chain.o init.o resource_pool.o ycbcr.o $(INPUTS:=.o) $(EFFECTS:=.o) diff --git a/compute_shader_test.cpp b/compute_shader_test.cpp new file mode 100644 index 0000000..7179f82 --- /dev/null +++ b/compute_shader_test.cpp @@ -0,0 +1,68 @@ +#include + +#include +#include + +#include "effect.h" +#include "flat_input.h" +#include "gtest/gtest.h" +#include "init.h" +#include "resource_pool.h" +#include "test_util.h" +#include "util.h" + +using namespace std; + +namespace movit { + +// An effect that does nothing. +class IdentityComputeEffect : public Effect { +public: + IdentityComputeEffect() {} + virtual string effect_type_id() const { return "IdentityComputeEffect"; } + virtual bool is_compute_shader() const { return true; } + string output_fragment_shader() { return read_file("identity.compute"); } +}; + +TEST(ComputeShaderTest, Identity) { + float data[] = { + 0.0f, 0.25f, 0.3f, + 0.75f, 1.0f, 1.0f, + }; + float out_data[6]; + EffectChainTester tester(data, 3, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + if (!movit_compute_shaders_supported) { + fprintf(stderr, "Skipping test; no support for compile shaders.\n"); + return; + } + tester.get_chain()->add_effect(new IdentityComputeEffect()); + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); + + expect_equal(data, out_data, 3, 2); +} + +// Like IdentityComputeEffect, but due to the alpha handling, this will be +// the very last effect in the chain, which means we can't output it directly +// to the screen. +class IdentityAlphaComputeEffect : public IdentityComputeEffect { + AlphaHandling alpha_handling() const { return DONT_CARE_ALPHA_TYPE; } +}; + +TEST(ComputeShaderTest, LastEffectInChain) { + float data[] = { + 0.0f, 0.25f, 0.3f, + 0.75f, 1.0f, 1.0f, + }; + float out_data[6]; + EffectChainTester tester(data, 3, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + if (!movit_compute_shaders_supported) { + fprintf(stderr, "Skipping test; no support for compile shaders.\n"); + return; + } + tester.get_chain()->add_effect(new IdentityAlphaComputeEffect()); + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); + + expect_equal(data, out_data, 3, 2); +} + +} // namespace movit diff --git a/effect.h b/effect.h index e10fab8..073780f 100644 --- a/effect.h +++ b/effect.h @@ -245,6 +245,45 @@ public: assert(false); } + // Whether this effect uses a compute shader instead of a regular fragment shader. + // Compute shaders are more flexible in that they can have multiple outputs + // for each invocation and also communicate between instances (by using shared + // memory within each group), but are not universally supported. The typical + // pattern would be to check movit_compute_shaders_supported and rewrite the + // graph to use a compute shader effect instead of a regular effect if it is + // available, in order to get better performance. Since compute shaders can reuse + // loads (again typically through shared memory), using needs_texture_bounce() + // is usually not needed, although it is allowed; the best candidates for compute + // shaders are typically those that sample many times from their input + // but can reuse those loads across neighboring instances. + // + // Compute shaders commonly work with unnormalized texture coordinates + // (where coordinates are integers [0..W) and [0..H)), whereas the rest + // of Movit, including any inputs you may want to sample from, works + // with normalized coordinates ([0..1)). Movit gives you uniforms + // PREFIX(inv_output_size) and PREFIX(output_texcoord_adjust) that you + // can use to transform unnormalized to normalized, as well as a macro + // NORMALIZE_TEXTURE_COORDS(vec2) that does it for you. + // + // Since compute shaders have flexible output, it is difficult to chain other + // effects after them in the same phase, and thus, they will always be last. + // (This limitation may be lifted for the special case of one-to-one effects + // in the future.) Furthermore, they cannot write to the framebuffer, just to + // textures, so Movit may have to insert an extra phase just to do the output + // from a texture to the screen in some cases. However, this is transparent + // to both the effect and the user. + virtual bool is_compute_shader() const { return false; } + + // For a compute shader (see the previous member function), what dimensions + // it should be invoked over. Called every frame, before uniforms are set + // (so you are allowed to update uniforms based from this call). + virtual void get_compute_dimensions(unsigned output_width, unsigned output_height, + unsigned *x, unsigned *y, unsigned *z) const { + *x = output_width; + *y = output_height; + *z = 1; + } + // Tells the effect the resolution of each of its input. // This will be called every frame, and always before get_output_size(), // so you can change your output size based on the input if so desired. @@ -364,6 +403,7 @@ private: std::map params_vec4; // Picked out by EffectChain during finalization. + std::vector > uniforms_image2d; std::vector > uniforms_sampler2d; std::vector > uniforms_bool; std::vector > uniforms_int; diff --git a/effect_chain.cpp b/effect_chain.cpp index 65c02c2..a9d9e1e 100644 --- a/effect_chain.cpp +++ b/effect_chain.cpp @@ -32,6 +32,18 @@ using namespace std; namespace movit { +namespace { + +// An effect that does nothing. +class IdentityEffect : public Effect { +public: + IdentityEffect() {} + virtual string effect_type_id() const { return "IdentityEffect"; } + string output_fragment_shader() { return read_file("identity.frag"); } +}; + +} // namespace + EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool) : aspect_nom(aspect_nom), aspect_denom(aspect_denom), @@ -347,7 +359,12 @@ void collect_uniform_locations(GLuint glsl_program_num, vector > *pha void EffectChain::compile_glsl_program(Phase *phase) { - string frag_shader_header = read_version_dependent_file("header", "frag"); + string frag_shader_header; + if (phase->is_compute_shader) { + frag_shader_header = read_file("header.compute"); + } else { + frag_shader_header = read_version_dependent_file("header", "frag"); + } string frag_shader = ""; // Create functions and uniforms for all the texture inputs that we need. @@ -402,6 +419,9 @@ void EffectChain::compile_glsl_program(Phase *phase) frag_shader += "\n"; frag_shader += string("#define FUNCNAME ") + effect_id + "\n"; + if (node->effect->is_compute_shader()) { + frag_shader += string("#define NORMALIZE_TEXTURE_COORDS(tc) ((tc) * ") + effect_id + "_inv_output_size + " + effect_id + "_output_texcoord_adjust)\n"; + } frag_shader += replace_prefix(node->effect->output_fragment_shader(), effect_id); frag_shader += "#undef FUNCNAME\n"; if (node->incoming_links.size() == 1) { @@ -479,7 +499,13 @@ void EffectChain::compile_glsl_program(Phase *phase) } } - frag_shader.append(read_file("footer.frag")); + if (phase->is_compute_shader) { + frag_shader.append(read_file("footer.compute")); + phase->output_node->effect->register_uniform_vec2("inv_output_size", (float *)&phase->inv_output_size); + phase->output_node->effect->register_uniform_vec2("output_texcoord_adjust", (float *)&phase->output_texcoord_adjust); + } else { + frag_shader.append(read_file("footer.frag")); + } // Collect uniforms from all effects and output them. Note that this needs // to happen after output_fragment_shader(), even though the uniforms come @@ -492,6 +518,7 @@ void EffectChain::compile_glsl_program(Phase *phase) Node *node = phase->effects[i]; Effect *effect = node->effect; const string effect_id = phase->effect_ids[node]; + extract_uniform_declarations(effect->uniforms_image2d, "image2D", effect_id, &phase->uniforms_image2d, &frag_shader_uniforms); extract_uniform_declarations(effect->uniforms_sampler2d, "sampler2D", effect_id, &phase->uniforms_sampler2d, &frag_shader_uniforms); extract_uniform_declarations(effect->uniforms_bool, "bool", effect_id, &phase->uniforms_bool, &frag_shader_uniforms); extract_uniform_declarations(effect->uniforms_int, "int", effect_id, &phase->uniforms_int, &frag_shader_uniforms); @@ -520,7 +547,19 @@ void EffectChain::compile_glsl_program(Phase *phase) vert_shader[pos + needle.size() - 1] = '1'; } - phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs); + if (phase->is_compute_shader) { + phase->glsl_program_num = resource_pool->compile_glsl_compute_program(frag_shader); + + Uniform uniform; + uniform.name = "outbuf"; + uniform.value = &phase->outbuf_image_unit; + uniform.prefix = "tex"; + uniform.num_values = 1; + uniform.location = -1; + phase->uniforms_image2d.push_back(uniform); + } else { + phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs); + } GLint position_attribute_index = glGetAttribLocation(phase->glsl_program_num, "position"); GLint texcoord_attribute_index = glGetAttribLocation(phase->glsl_program_num, "texcoord"); if (position_attribute_index != -1) { @@ -531,6 +570,7 @@ void EffectChain::compile_glsl_program(Phase *phase) } // Collect the resulting location numbers for each uniform. + collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_image2d); collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_sampler2d); collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_bool); collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_int); @@ -557,6 +597,7 @@ Phase *EffectChain::construct_phase(Node *output, map *complete Phase *phase = new Phase; phase->output_node = output; + phase->is_compute_shader = output->effect->is_compute_shader(); // If the output effect has one-to-one sampling, we try to trace this // status down through the dependency chain. This is important in case @@ -603,6 +644,12 @@ Phase *EffectChain::construct_phase(Node *output, map *complete start_new_phase = true; } + // Compute shaders currently always end phases. + // (We might loosen this up in some cases in the future.) + if (deps[i]->effect->is_compute_shader()) { + start_new_phase = true; + } + // Propagate information about needing mipmaps down the chain, // breaking the phase if we notice an incompatibility. // @@ -1666,6 +1713,21 @@ void EffectChain::add_dither_if_needed() dither_effect = dither->effect; } +// Compute shaders can't output to the framebuffer, so if the last +// phase ends in a compute shader, add a dummy phase at the end that +// only blits directly from the temporary texture. +// +// TODO: Add an API for rendering directly to textures, for the cases +// where we're only rendering to an FBO anyway. +void EffectChain::add_dummy_effect_if_needed() +{ + Node *output = find_output_node(); + if (output->effect->is_compute_shader()) { + Node *dummy = add_node(new IdentityEffect()); + connect_nodes(output, dummy); + } +} + // Find the output node. This is, simply, one that has no outgoing links. // If there are multiple ones, the graph is malformed (we do not support // multiple outputs right now). @@ -1737,7 +1799,10 @@ void EffectChain::finalize() output_dot("step18-before-dither.dot"); add_dither_if_needed(); - output_dot("step19-final.dot"); + output_dot("step19-before-dummy-effect.dot"); + add_dummy_effect_if_needed(); + + output_dot("step20-final.dot"); // Construct all needed GLSL programs, starting at the output. // We need to keep track of which effects have already been computed, @@ -1746,7 +1811,7 @@ void EffectChain::finalize() map completed_effects; construct_phase(find_output_node(), &completed_effects); - output_dot("step20-split-to-phases.dot"); + output_dot("step21-split-to-phases.dot"); assert(phases[0]->inputs.empty()); @@ -1948,16 +2013,29 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, phase->input_samplers[sampler] = sampler; // Bind the sampler to the right uniform. } - // And now the output. (Already set up for us if it is the last phase.) - if (!last_phase) { - fbo = resource_pool->create_fbo((*output_textures)[phase]); - glBindFramebuffer(GL_FRAMEBUFFER, fbo); - glViewport(0, 0, phase->output_width, phase->output_height); - } - GLuint instance_program_num = resource_pool->use_glsl_program(phase->glsl_program_num); check_error(); + // And now the output. + if (phase->is_compute_shader) { + // This is currently the only place where we use image units, + // so we can always use 0. + phase->outbuf_image_unit = 0; + glBindImageTexture(phase->outbuf_image_unit, (*output_textures)[phase], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F); + check_error(); + phase->inv_output_size.x = 1.0f / phase->output_width; + phase->inv_output_size.y = 1.0f / phase->output_height; + phase->output_texcoord_adjust.x = 0.5f / phase->output_width; + phase->output_texcoord_adjust.y = 0.5f / phase->output_height; + } else { + // (Already set up for us if it is the last phase.) + if (!last_phase) { + fbo = resource_pool->create_fbo((*output_textures)[phase]); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glViewport(0, 0, phase->output_width, phase->output_height); + } + } + // Give the required parameters to all the effects. unsigned sampler_num = phase->inputs.size(); for (unsigned i = 0; i < phase->effects.size(); ++i) { @@ -1974,16 +2052,29 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, } } - // Uniforms need to come after set_gl_state(), since they can be updated - // from there. - setup_uniforms(phase); - // Bind the vertex data. - GLuint vao = resource_pool->create_vec2_vao(phase->attribute_indexes, vbo); - glBindVertexArray(vao); + if (phase->is_compute_shader) { + unsigned x, y, z; + phase->output_node->effect->get_compute_dimensions(phase->output_width, phase->output_height, &x, &y, &z); - glDrawArrays(GL_TRIANGLES, 0, 3); - check_error(); + // Uniforms need to come after set_gl_state() _and_ get_compute_dimensions(), + // since they can be updated from there. + setup_uniforms(phase); + glDispatchCompute(x, y, z); + } else { + // Uniforms need to come after set_gl_state(), since they can be updated + // from there. + setup_uniforms(phase); + + // Bind the vertex data. + GLuint vao = resource_pool->create_vec2_vao(phase->attribute_indexes, vbo); + glBindVertexArray(vao); + + glDrawArrays(GL_TRIANGLES, 0, 3); + check_error(); + + resource_pool->release_vec2_vao(vao); + } for (unsigned i = 0; i < phase->effects.size(); ++i) { Node *node = phase->effects[i]; @@ -1991,9 +2082,8 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, } resource_pool->unuse_glsl_program(instance_program_num); - resource_pool->release_vec2_vao(vao); - if (!last_phase) { + if (!last_phase && !phase->is_compute_shader) { resource_pool->release_fbo(fbo); } } @@ -2001,6 +2091,12 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, void EffectChain::setup_uniforms(Phase *phase) { // TODO: Use UBO blocks. + for (size_t i = 0; i < phase->uniforms_image2d.size(); ++i) { + const Uniform &uniform = phase->uniforms_image2d[i]; + if (uniform.location != -1) { + glUniform1iv(uniform.location, uniform.num_values, uniform.value); + } + } for (size_t i = 0; i < phase->uniforms_sampler2d.size(); ++i) { const Uniform &uniform = phase->uniforms_sampler2d[i]; if (uniform.location != -1) { diff --git a/effect_chain.h b/effect_chain.h index 6d9d062..c292b38 100644 --- a/effect_chain.h +++ b/effect_chain.h @@ -178,11 +178,24 @@ struct Phase { std::vector effects; // In order. unsigned output_width, output_height, virtual_output_width, virtual_output_height; + // Whether this phase is compiled as a compute shader, ie., the last effect is + // marked as one. + bool is_compute_shader; + + // If , which image unit the output buffer is bound to. + // This is used as source for a Uniform below. + int outbuf_image_unit; + + // These are used in transforming from unnormalized to normalized coordinates + // in compute shaders. + Point2D inv_output_size, output_texcoord_adjust; + // Identifier used to create unique variables in GLSL. // Unique per-phase to increase cacheability of compiled shaders. std::map effect_ids; // Uniforms for this phase; combined from all the effects. + std::vector > uniforms_image2d; std::vector > uniforms_sampler2d; std::vector > uniforms_bool; std::vector > uniforms_int; @@ -499,6 +512,7 @@ private: void fix_output_gamma(); void add_ycbcr_conversion_if_needed(); void add_dither_if_needed(); + void add_dummy_effect_if_needed(); float aspect_nom, aspect_denom; ImageFormat output_format; diff --git a/footer.compute b/footer.compute new file mode 100644 index 0000000..1baa856 --- /dev/null +++ b/footer.compute @@ -0,0 +1,7 @@ +// GLSL is pickier than the C++ preprocessor in if-testing for undefined +// tokens; do some fixups here to keep it happy. + +void main() +{ + INPUT(); +} diff --git a/header.compute b/header.compute new file mode 100644 index 0000000..5487b20 --- /dev/null +++ b/header.compute @@ -0,0 +1,23 @@ +#version 130 +#extension GL_ARB_compute_shader : enable +#extension GL_ARB_shader_image_load_store : enable + +// FIXME this needs to be auto-output or something +layout(rgba16f) uniform restrict writeonly image2D outbuf; + +vec4 tex2D(sampler2D s, vec2 coord) +{ + return texture(s, coord); +} + +void cs_output(uvec2 coord, vec4 val) +{ + imageStore(outbuf, ivec2(coord), val); +} + +void cs_output(ivec2 coord, vec4 val) +{ + imageStore(outbuf, coord, val); +} + +#define OUTPUT(tc, val) cs_output(tc, val) diff --git a/identity.compute b/identity.compute new file mode 100644 index 0000000..9f741bf --- /dev/null +++ b/identity.compute @@ -0,0 +1,9 @@ +// Identity compute shader (sometimes useful to do nothing). + +layout(local_size_x = 1) in; + +void FUNCNAME() +{ + vec4 val = INPUT(NORMALIZE_TEXTURE_COORDS(gl_GlobalInvocationID.xy)); + OUTPUT(gl_GlobalInvocationID.xy, val); +} diff --git a/init.cpp b/init.cpp index d312cd8..7f2f5cc 100644 --- a/init.cpp +++ b/init.cpp @@ -15,7 +15,7 @@ namespace movit { bool movit_initialized = false; MovitDebugLevel movit_debug_level = MOVIT_DEBUG_ON; float movit_texel_subpixel_precision; -bool movit_timer_queries_supported; +bool movit_timer_queries_supported, movit_compute_shaders_supported; int movit_num_wrongly_rounded; MovitShaderModel movit_shader_model; @@ -310,6 +310,16 @@ bool check_extensions() movit_timer_queries_supported = (epoxy_gl_version() >= 33 || epoxy_has_gl_extension("GL_ARB_timer_query")); + // Certain effects have compute shader implementations, which may be + // more efficient than the normal fragment shader versions. + // GLSL 3.10 supposedly also has compute shaders, but I haven't tested them, + // so we require desktop OpenGL. + movit_compute_shaders_supported = + (epoxy_is_desktop_gl() && + (epoxy_gl_version() >= 43 || + (epoxy_has_gl_extension("GL_ARB_compute_shader") && + epoxy_has_gl_extension("GL_ARB_shader_image_load_store")))); + return true; } diff --git a/init.h b/init.h index a644435..2305522 100644 --- a/init.h +++ b/init.h @@ -67,6 +67,10 @@ extern int movit_num_wrongly_rounded; // Whether the OpenGL driver (or GPU) in use supports GL_ARB_timer_query. extern bool movit_timer_queries_supported; +// Whether the OpenGL driver (or GPU) in use supports compute shaders. +// Note that certain OpenGL implementations might only allow this in core mode. +extern bool movit_compute_shaders_supported; + // What shader model we are compiling for. This only affects the choice // of a few files (like header.frag); most of the shaders are the same. enum MovitShaderModel { diff --git a/resource_pool.cpp b/resource_pool.cpp index 46592d0..b9adda7 100644 --- a/resource_pool.cpp +++ b/resource_pool.cpp @@ -210,6 +210,56 @@ void ResourcePool::release_glsl_program(GLuint glsl_program_num) pthread_mutex_unlock(&lock); } +GLuint ResourcePool::compile_glsl_compute_program(const string& compute_shader) +{ + GLuint glsl_program_num; + pthread_mutex_lock(&lock); + + const string &key = compute_shader; + if (compute_programs.count(key)) { + // Already in the cache. + glsl_program_num = compute_programs[key]; + increment_program_refcount(glsl_program_num); + } else { + // Not in the cache. Compile the shader. + GLuint cs_obj = compile_shader(compute_shader, GL_COMPUTE_SHADER); + check_error(); + glsl_program_num = link_compute_program(cs_obj); + + output_debug_shader(compute_shader, "compute"); + + compute_programs.insert(make_pair(key, glsl_program_num)); + add_master_program(glsl_program_num); + + ComputeShaderSpec spec; + spec.cs_obj = cs_obj; + compute_program_shaders.insert(make_pair(glsl_program_num, spec)); + } + pthread_mutex_unlock(&lock); + return glsl_program_num; +} + +GLuint ResourcePool::link_compute_program(GLuint cs_obj) +{ + GLuint glsl_program_num = glCreateProgram(); + check_error(); + glAttachShader(glsl_program_num, cs_obj); + check_error(); + glLinkProgram(glsl_program_num); + check_error(); + + GLint success; + glGetProgramiv(glsl_program_num, GL_LINK_STATUS, &success); + if (success == GL_FALSE) { + GLchar error_log[1024] = {0}; + glGetProgramInfoLog(glsl_program_num, 1024, NULL, error_log); + fprintf(stderr, "Error linking program: %s\n", error_log); + exit(1); + } + + return glsl_program_num; +} + GLuint ResourcePool::use_glsl_program(GLuint glsl_program_num) { pthread_mutex_lock(&lock); @@ -226,12 +276,19 @@ GLuint ResourcePool::use_glsl_program(GLuint glsl_program_num) // will later put it onto the list.) map::iterator shader_it = program_shaders.find(glsl_program_num); - assert(shader_it != program_shaders.end()); - - instance_program_num = link_program( - shader_it->second.vs_obj, - shader_it->second.fs_obj, - shader_it->second.fragment_shader_outputs); + if (shader_it == program_shaders.end()) { + // Should be a compute shader. + map::iterator compute_shader_it = + compute_program_shaders.find(glsl_program_num); + instance_program_num = link_compute_program( + compute_shader_it->second.cs_obj); + } else { + // A regular fragment shader. + instance_program_num = link_program( + shader_it->second.vs_obj, + shader_it->second.fs_obj, + shader_it->second.fragment_shader_outputs); + } program_masters.insert(make_pair(instance_program_num, glsl_program_num)); } pthread_mutex_unlock(&lock); diff --git a/resource_pool.h b/resource_pool.h index 35b3b3b..2324abc 100644 --- a/resource_pool.h +++ b/resource_pool.h @@ -70,6 +70,11 @@ public: const std::vector& frag_shader_outputs); void release_glsl_program(GLuint glsl_program_num); + // Same as the previous, but for compile shaders instead. There is currently + // no support for binding multiple outputs. + GLuint compile_glsl_compute_program(const std::string& compile_shader); + void release_glsl_compute_program(GLuint glsl_program_num); + // Since uniforms belong to the program and not to the context, // a given GLSL program number can't be used by more than one thread // at a time. Thus, if two threads want to use the same program @@ -157,6 +162,8 @@ private: GLuint fs_obj, const std::vector& fragment_shader_outputs); + static GLuint link_compute_program(GLuint cs_obj); + // Protects all the other elements in the class. pthread_mutex_t lock; @@ -165,6 +172,9 @@ private: // A mapping from vertex/fragment shader source strings to compiled program number. std::map, GLuint> programs; + // A mapping from compute shader source string to compiled program number. + std::map compute_programs; + // A mapping from compiled program number to number of current users. // Once this reaches zero, the program is taken out of this map and instead // put on the freelist (after which it may be deleted). @@ -178,6 +188,11 @@ private: }; std::map program_shaders; + struct ComputeShaderSpec { + GLuint cs_obj; + }; + std::map compute_program_shaders; + // For each program, a list of other programs that are exactly like it. // By default, will only contain the program itself, but due to cloning // (see use_glsl_program()), may grow. Programs are taken off this list diff --git a/version.h b/version.h index 57e4645..64469bd 100644 --- a/version.h +++ b/version.h @@ -5,6 +5,6 @@ // changes, even within git versions. There is no specific version // documentation outside the regular changelogs, though. -#define MOVIT_VERSION 31 +#define MOVIT_VERSION 32 #endif // !defined(_MOVIT_VERSION_H) -- 2.39.2