]> git.sesse.net Git - movit/blobdiff - effect_chain.cpp
Support rendering compute shaders straight to textures (skipping the dummy phase).
[movit] / effect_chain.cpp
index a9d9e1e2c437da605bc2997b20f824ce6ca64ee6..4afe415754fa40dc5486c182ca6377c76e48f5ca 100644 (file)
@@ -49,8 +49,8 @@ EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *res
          aspect_denom(aspect_denom),
          output_color_rgba(false),
          num_output_color_ycbcr(0),
-         dither_effect(NULL),
-         ycbcr_conversion_effect_node(NULL),
+         dither_effect(nullptr),
+         ycbcr_conversion_effect_node(nullptr),
          intermediate_format(GL_RGBA16F),
          intermediate_transformation(NO_FRAMEBUFFER_TRANSFORMATION),
          num_dither_bits(0),
@@ -58,7 +58,7 @@ EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *res
          finalized(false),
          resource_pool(resource_pool),
          do_phase_timing(false) {
-       if (resource_pool == NULL) {
+       if (resource_pool == nullptr) {
                this->resource_pool = new ResourcePool();
                owns_resource_pool = true;
        } else {
@@ -311,10 +311,10 @@ string replace_prefix(const string &text, const string &prefix)
 namespace {
 
 template<class T>
-void extract_uniform_declarations(const vector<Uniform<T> > &effect_uniforms,
+void extract_uniform_declarations(const vector<Uniform<T>> &effect_uniforms,
                                   const string &type_specifier,
                                   const string &effect_id,
-                                  vector<Uniform<T> > *phase_uniforms,
+                                  vector<Uniform<T>> *phase_uniforms,
                                   string *glsl_string)
 {
        for (unsigned i = 0; i < effect_uniforms.size(); ++i) {
@@ -327,10 +327,10 @@ void extract_uniform_declarations(const vector<Uniform<T> > &effect_uniforms,
 }
 
 template<class T>
-void extract_uniform_array_declarations(const vector<Uniform<T> > &effect_uniforms,
+void extract_uniform_array_declarations(const vector<Uniform<T>> &effect_uniforms,
                                         const string &type_specifier,
                                         const string &effect_id,
-                                        vector<Uniform<T> > *phase_uniforms,
+                                        vector<Uniform<T>> *phase_uniforms,
                                         string *glsl_string)
 {
        for (unsigned i = 0; i < effect_uniforms.size(); ++i) {
@@ -347,7 +347,7 @@ void extract_uniform_array_declarations(const vector<Uniform<T> > &effect_unifor
 }
 
 template<class T>
-void collect_uniform_locations(GLuint glsl_program_num, vector<Uniform<T> > *phase_uniforms)
+void collect_uniform_locations(GLuint glsl_program_num, vector<Uniform<T>> *phase_uniforms)
 {
        for (unsigned i = 0; i < phase_uniforms->size(); ++i) {
                Uniform<T> &uniform = (*phase_uniforms)[i];
@@ -361,7 +361,7 @@ void EffectChain::compile_glsl_program(Phase *phase)
 {
        string frag_shader_header;
        if (phase->is_compute_shader) {
-               frag_shader_header = read_file("header.compute");
+               frag_shader_header = read_file("header.comp");
        } else {
                frag_shader_header = read_version_dependent_file("header", "frag");
        }
@@ -781,7 +781,7 @@ void EffectChain::output_dot(const char *filename)
        }
 
        FILE *fp = fopen(filename, "w");
-       if (fp == NULL) {
+       if (fp == nullptr) {
                perror(filename);
                exit(1);
        }
@@ -825,7 +825,7 @@ void EffectChain::output_dot(const char *filename)
 
                if (nodes[i]->outgoing_links.empty() && !nodes[i]->disabled) {
                        // Output node.
-                       vector<string> labels = get_labels_for_edge(nodes[i], NULL);
+                       vector<string> labels = get_labels_for_edge(nodes[i], nullptr);
                        output_dot_edge(fp, from_node_id, "output", labels);
                }
        }
@@ -838,7 +838,7 @@ vector<string> EffectChain::get_labels_for_edge(const Node *from, const Node *to
 {
        vector<string> labels;
 
-       if (to != NULL && to->effect->needs_texture_bounce()) {
+       if (to != nullptr && to->effect->needs_texture_bounce()) {
                labels.push_back("needs_bounce");
        }
        if (from->effect->changes_output_size()) {
@@ -1725,6 +1725,7 @@ void EffectChain::add_dummy_effect_if_needed()
        if (output->effect->is_compute_shader()) {
                Node *dummy = add_node(new IdentityEffect());
                connect_nodes(output, dummy);
+               has_dummy_effect = true;
        }
 }
 
@@ -1820,18 +1821,6 @@ void EffectChain::finalize()
 
 void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height)
 {
-       assert(finalized);
-
-       // This needs to be set anew, in case we are coming from a different context
-       // from when we initialized.
-       check_error();
-       glDisable(GL_DITHER);
-       check_error();
-
-       const bool final_srgb = glIsEnabled(GL_FRAMEBUFFER_SRGB);
-       check_error();
-       bool current_srgb = final_srgb;
-
        // Save original viewport.
        GLuint x = 0, y = 0;
 
@@ -1844,6 +1833,44 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                height = viewport[3];
        }
 
+       render(dest_fbo, {}, x, y, width, height);
+}
+
+void EffectChain::render_to_texture(const vector<DestinationTexture> &destinations, unsigned width, unsigned height)
+{
+       assert(finalized);
+       assert(!destinations.empty());
+
+       if (!has_dummy_effect) {
+               // We don't end in a compute shader, so there's nothing specific for us to do.
+               // Create an FBO for this set of textures, and just render to that.
+               GLuint texnums[4] = { 0, 0, 0, 0 };
+               for (unsigned i = 0; i < destinations.size() && i < 4; ++i) {
+                       texnums[i] = destinations[i].texnum;
+               }
+               GLuint dest_fbo = resource_pool->create_fbo(texnums[0], texnums[1], texnums[2], texnums[3]);
+               render(dest_fbo, {}, 0, 0, width, height);
+               resource_pool->release_fbo(dest_fbo);
+       } else {
+               render((GLuint)-1, destinations, 0, 0, width, height);
+       }
+}
+
+void EffectChain::render(GLuint dest_fbo, const vector<DestinationTexture> &destinations, unsigned x, unsigned y, unsigned width, unsigned height)
+{
+       assert(finalized);
+       assert(destinations.size() <= 1);
+
+       // This needs to be set anew, in case we are coming from a different context
+       // from when we initialized.
+       check_error();
+       glDisable(GL_DITHER);
+       check_error();
+
+       const bool final_srgb = glIsEnabled(GL_FRAMEBUFFER_SRGB);
+       check_error();
+       bool current_srgb = final_srgb;
+
        // Basic state.
        check_error();
        glDisable(GL_BLEND);
@@ -1859,7 +1886,29 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        // since otherwise this turns into an (albeit simple) register allocation problem.
        map<Phase *, GLuint> output_textures;
 
-       for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
+       size_t num_phases = phases.size();
+       if (destinations.empty()) {
+               assert(dest_fbo != (GLuint)-1);
+       } else {
+               assert(has_dummy_effect);
+               assert(x == 0);
+               assert(y == 0);
+               assert(num_phases >= 2);
+               assert(!phases.back()->is_compute_shader);
+               assert(phases.back()->effects.size() == 1);
+               assert(phases.back()->effects[0]->effect->effect_type_id() == "IdentityEffect");
+
+               // We are rendering to a set of textures, so we can run the compute shader
+               // directly and skip the dummy phase.
+               --num_phases;
+
+               // TODO: Support more than one destination.
+               output_textures[phases[num_phases - 1]] = destinations[0].texnum;
+               assert(destinations[0].format == GL_RGBA16F);
+               assert(destinations[0].texnum != 0);
+       }
+
+       for (unsigned phase_num = 0; phase_num < num_phases; ++phase_num) {
                Phase *phase = phases[phase_num];
 
                if (do_phase_timing) {
@@ -1873,23 +1922,27 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                        glBeginQuery(GL_TIME_ELAPSED, timer_query_object);
                        phase->timer_query_objects_running.push_back(timer_query_object);
                }
-               if (phase_num == phases.size() - 1) {
+               bool render_to_texture = true;
+               if (phase_num == num_phases - 1) {
                        // Last phase goes to the output the user specified.
-                       glBindFramebuffer(GL_FRAMEBUFFER, dest_fbo);
-                       check_error();
-                       GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
-                       assert(status == GL_FRAMEBUFFER_COMPLETE);
-                       glViewport(x, y, width, height);
-                       if (dither_effect != NULL) {
+                       if (!phase->is_compute_shader) {
+                               glBindFramebuffer(GL_FRAMEBUFFER, dest_fbo);
+                               check_error();
+                               GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
+                               assert(status == GL_FRAMEBUFFER_COMPLETE);
+                               glViewport(x, y, width, height);
+                               render_to_texture = false;
+                       }
+                       if (dither_effect != nullptr) {
                                CHECK(dither_effect->set_int("output_width", width));
                                CHECK(dither_effect->set_int("output_height", height));
                        }
                }
-               bool last_phase = (phase_num == phases.size() - 1);
 
                // Enable sRGB rendering for intermediates in case we are
                // rendering to an sRGB format.
-               bool needs_srgb = last_phase ? final_srgb : true;
+               // TODO: Support this for compute shaders.
+               bool needs_srgb = render_to_texture ? true : final_srgb;
                if (needs_srgb && !current_srgb) {
                        glEnable(GL_FRAMEBUFFER_SRGB);
                        check_error();
@@ -1900,16 +1953,18 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                        current_srgb = true;
                }
 
-               execute_phase(phase, last_phase, &output_textures, &generated_mipmaps);
+               execute_phase(phase, render_to_texture, &output_textures, &generated_mipmaps);
                if (do_phase_timing) {
                        glEndQuery(GL_TIME_ELAPSED);
                }
        }
 
-       for (map<Phase *, GLuint>::const_iterator texture_it = output_textures.begin();
-            texture_it != output_textures.end();
-            ++texture_it) {
-               resource_pool->release_2d_texture(texture_it->second);
+       // Take out the destination textures from the list of temporary textures to be freed.
+       if (has_dummy_effect && !destinations.empty()) {
+               output_textures.erase(phases[num_phases - 1]);
+       }
+       for (const auto &phase_and_texnum : output_textures) {
+               resource_pool->release_2d_texture(phase_and_texnum.second);
        }
 
        glBindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -1926,8 +1981,8 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                // Get back the timer queries.
                for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
                        Phase *phase = phases[phase_num];
-                       for (std::list<GLuint>::iterator timer_it = phase->timer_query_objects_running.begin();
-                            timer_it != phase->timer_query_objects_running.end(); ) {
+                       for (auto timer_it = phase->timer_query_objects_running.cbegin();
+                            timer_it != phase->timer_query_objects_running.cend(); ) {
                                GLint timer_query_object = *timer_it;
                                GLint available;
                                glGetQueryObjectiv(timer_query_object, GL_QUERY_RESULT_AVAILABLE, &available);
@@ -1982,7 +2037,7 @@ void EffectChain::print_phase_timing()
        printf("Total:   %5.1f ms\n", total_time_ms);
 }
 
-void EffectChain::execute_phase(Phase *phase, bool last_phase,
+void EffectChain::execute_phase(Phase *phase, bool render_to_texture,
                                 map<Phase *, GLuint> *output_textures,
                                 set<Phase *> *generated_mipmaps)
 {
@@ -1990,11 +2045,22 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
 
        // Find a texture for this phase.
        inform_input_sizes(phase);
-       if (!last_phase) {
+       if (render_to_texture) {
                find_output_size(phase);
 
                GLuint tex_num = resource_pool->create_2d_texture(intermediate_format, phase->output_width, phase->output_height);
+               assert(tex_num != 0);
                output_textures->insert(make_pair(phase, tex_num));
+
+               // The output texture needs to have valid state to be written to by a compute shader.
+               if (phase->is_compute_shader) {
+                       glActiveTexture(GL_TEXTURE0);
+                       check_error();
+                       glBindTexture(GL_TEXTURE_2D, (*output_textures)[phase]);
+                       check_error();
+                       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+                       check_error();
+               }
        }
 
        // Set up RTT inputs for this phase.
@@ -2002,6 +2068,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
                glActiveTexture(GL_TEXTURE0 + sampler);
                Phase *input = phase->inputs[sampler];
                input->output_node->bound_sampler_num = sampler;
+               assert(output_textures->count(input));
                glBindTexture(GL_TEXTURE_2D, (*output_textures)[input]);
                check_error();
                if (phase->input_needs_mipmaps && generated_mipmaps->count(input) == 0) {
@@ -2021,6 +2088,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
                // This is currently the only place where we use image units,
                // so we can always use 0.
                phase->outbuf_image_unit = 0;
+               assert(output_textures->count(phase));
                glBindImageTexture(phase->outbuf_image_unit, (*output_textures)[phase], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
                check_error();
                phase->inv_output_size.x = 1.0f / phase->output_width;
@@ -2028,8 +2096,8 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
                phase->output_texcoord_adjust.x = 0.5f / phase->output_width;
                phase->output_texcoord_adjust.y = 0.5f / phase->output_height;
        } else {
-               // (Already set up for us if it is the last phase.)
-               if (!last_phase) {
+               // (Already set up for us if we are outputting to the user's FBO.)
+               if (render_to_texture) {
                        fbo = resource_pool->create_fbo((*output_textures)[phase]);
                        glBindFramebuffer(GL_FRAMEBUFFER, fbo);
                        glViewport(0, 0, phase->output_width, phase->output_height);
@@ -2061,6 +2129,9 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
                // since they can be updated from there.
                setup_uniforms(phase);
                glDispatchCompute(x, y, z);
+               check_error();
+               glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT);
+               check_error();
        } else {
                // Uniforms need to come after set_gl_state(), since they can be updated
                // from there.
@@ -2083,7 +2154,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
 
        resource_pool->unuse_glsl_program(instance_program_num);
 
-       if (!last_phase && !phase->is_compute_shader) {
+       if (render_to_texture && !phase->is_compute_shader) {
                resource_pool->release_fbo(fbo);
        }
 }