Optimize VAO/VBO usage for minimal state changes.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 7 Feb 2016 01:12:14 +0000 (02:12 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 7 Feb 2016 01:40:06 +0000 (02:40 +0100)
This is similar to what we had earlier to just reuse the VAO,
but now with correct bindings no matter what vertex attributes
are assigned to what index, so that the (new) test passes.

This is actually slightly more efficient than what we had before,
since we don't look up the attributes by text anymore, and don't
reupload the VBO for each frame anymore. In practice, the effects
should be small.

effect_chain.cpp
effect_chain.h
util.cpp
util.h

index 385a87b..b0d4aaf 100644 (file)
@@ -51,6 +51,14 @@ EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *res
        } else {
                owns_resource_pool = false;
        }
+
+       // Generate a VBO with some data in (shared position and texture coordinate data).
+       float vertices[] = {
+               0.0f, 2.0f,
+               0.0f, 0.0f,
+               2.0f, 0.0f
+       };
+       vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
 }
 
 EffectChain::~EffectChain()
@@ -66,6 +74,8 @@ EffectChain::~EffectChain()
        if (owns_resource_pool) {
                delete resource_pool;
        }
+       glDeleteBuffers(1, &vbo);
+       check_error();
 }
 
 Input *EffectChain::add_input(Input *input)
@@ -448,6 +458,14 @@ void EffectChain::compile_glsl_program(Phase *phase)
        }
 
        phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs);
+       GLint position_attribute_index = glGetAttribLocation(phase->glsl_program_num, "position");
+       GLint texcoord_attribute_index = glGetAttribLocation(phase->glsl_program_num, "texcoord");
+       if (position_attribute_index != -1) {
+               phase->attribute_indexes.insert(position_attribute_index);
+       }
+       if (texcoord_attribute_index != -1) {
+               phase->attribute_indexes.insert(texcoord_attribute_index);
+       }
 
        // Collect the resulting location numbers for each uniform.
        collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_sampler2d);
@@ -1699,6 +1717,17 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        glDepthMask(GL_FALSE);
        check_error();
 
+       // Generate a VAO that will be used during the entire execution,
+       // and bind the VBO, since it contains all the data.
+       GLuint vao;
+       glGenVertexArrays(1, &vao);
+       check_error();
+       glBindVertexArray(vao);
+       check_error();
+       glBindBuffer(GL_ARRAY_BUFFER, vbo);
+       check_error();
+       set<GLint> bound_attribute_indices;
+
        set<Phase *> generated_mipmaps;
 
        // We choose the simplest option of having one texture per output,
@@ -1723,12 +1752,19 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                                CHECK(dither_effect->set_int("output_height", height));
                        }
                }
-               execute_phase(phase, phase_num == phases.size() - 1, &output_textures, &generated_mipmaps);
+               execute_phase(phase, phase_num == phases.size() - 1, &bound_attribute_indices, &output_textures, &generated_mipmaps);
                if (do_phase_timing) {
                        glEndQuery(GL_TIME_ELAPSED);
                }
        }
 
+       for (set<GLint>::iterator attr_it = bound_attribute_indices.begin();
+            attr_it != bound_attribute_indices.end();
+            ++attr_it) {
+               glDisableVertexAttribArray(*attr_it);
+               check_error();
+       }
+
        for (map<Phase *, GLuint>::const_iterator texture_it = output_textures.begin();
             texture_it != output_textures.end();
             ++texture_it) {
@@ -1740,6 +1776,13 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        glUseProgram(0);
        check_error();
 
+       glBindBuffer(GL_ARRAY_BUFFER, 0);
+       check_error();
+       glBindVertexArray(0);
+       check_error();
+       glDeleteVertexArrays(1, &vao);
+       check_error();
+
        if (do_phase_timing) {
                // Get back the timer queries.
                for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
@@ -1792,7 +1835,10 @@ void EffectChain::print_phase_timing()
        printf("Total:   %5.1f ms\n", total_time_ms);
 }
 
-void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLuint> *output_textures, set<Phase *> *generated_mipmaps)
+void EffectChain::execute_phase(Phase *phase, bool last_phase,
+                                set<GLint> *bound_attribute_indices,
+                                map<Phase *, GLuint> *output_textures,
+                                set<Phase *> *generated_mipmaps)
 {
        GLuint fbo = 0;
 
@@ -1853,27 +1899,33 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLui
        // from there.
        setup_uniforms(phase);
 
-       // Now draw!
-       float vertices[] = {
-               0.0f, 2.0f,
-               0.0f, 0.0f,
-               2.0f, 0.0f
-       };
-
-       GLuint vao;
-       glGenVertexArrays(1, &vao);
-       check_error();
-       glBindVertexArray(vao);
-       check_error();
+       // Clean up old attributes if they are no longer needed.
+       for (set<GLint>::iterator attr_it = bound_attribute_indices->begin();
+            attr_it != bound_attribute_indices->end(); ) {
+               if (phase->attribute_indexes.count(*attr_it) == 0) {
+                       glDisableVertexAttribArray(*attr_it);
+                       check_error();
+                       bound_attribute_indices->erase(attr_it++);
+               } else {
+                       ++attr_it;
+               }
+       }
 
-       GLuint position_vbo = fill_vertex_attribute(glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
-       GLuint texcoord_vbo = fill_vertex_attribute(glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
+       // Set up the new attributes, if needed.
+       for (set<GLint>::iterator attr_it = phase->attribute_indexes.begin();
+            attr_it != phase->attribute_indexes.end();
+            ++attr_it) {
+               if (bound_attribute_indices->count(*attr_it) == 0) {
+                       glEnableVertexAttribArray(*attr_it);
+                       check_error();
+                       glVertexAttribPointer(*attr_it, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+                       check_error();
+                       bound_attribute_indices->insert(*attr_it);
+               }
+       }
 
        glDrawArrays(GL_TRIANGLES, 0, 3);
        check_error();
-
-       cleanup_vertex_attribute(glsl_program_num, "position", position_vbo);
-       cleanup_vertex_attribute(glsl_program_num, "texcoord", texcoord_vbo);
        
        glUseProgram(0);
        check_error();
@@ -1886,9 +1938,6 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLui
        if (!last_phase) {
                resource_pool->release_fbo(fbo);
        }
-
-       glDeleteVertexArrays(1, &vao);
-       check_error();
 }
 
 void EffectChain::setup_uniforms(Phase *phase)
index 5baf9a5..70dc256 100644 (file)
@@ -147,6 +147,11 @@ struct Phase {
        Node *output_node;
 
        GLuint glsl_program_num;  // Owned by the resource_pool.
+
+       // Position and texcoord attribute indexes, although it doesn't matter
+       // which is which, because they contain the same data.
+       std::set<GLint> attribute_indexes;
+
        bool input_needs_mipmaps;
 
        // Inputs are only inputs from other phases (ie., those that come from RTT);
@@ -360,7 +365,10 @@ private:
        Phase *construct_phase(Node *output, std::map<Node *, Phase *> *completed_effects);
 
        // Execute one phase, ie. set up all inputs, effects and outputs, and render the quad.
-       void execute_phase(Phase *phase, bool last_phase, std::map<Phase *, GLuint> *output_textures, std::set<Phase *> *generated_mipmaps);
+       void execute_phase(Phase *phase, bool last_phase,
+                          std::set<GLint> *bound__attribute_indices,
+                          std::map<Phase *, GLuint> *output_textures,
+                          std::set<Phase *> *generated_mipmaps);
 
        // Set up uniforms for one phase. The program must already be bound.
        void setup_uniforms(Phase *phase);
@@ -431,6 +439,7 @@ private:
        unsigned num_dither_bits;
        OutputOrigin output_origin;
        bool finalized;
+       GLuint vbo;  // Contains vertex and texture coordinate data.
 
        ResourcePool *resource_pool;
        bool owns_resource_pool;
index 29428aa..3141f8d 100644 (file)
--- a/util.cpp
+++ b/util.cpp
@@ -272,6 +272,21 @@ template
 void combine_two_samples<fp16_int_t>(float w1, float w2, float pos1, float pos2, float num_subtexels, float inv_num_subtexels,
                                      fp16_int_t *offset, fp16_int_t *total_weight, float *sum_sq_error);
 
+GLuint generate_vbo(GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data)
+{
+       GLuint vbo;
+       glGenBuffers(1, &vbo);
+       check_error();
+       glBindBuffer(GL_ARRAY_BUFFER, vbo);
+       check_error();
+       glBufferData(GL_ARRAY_BUFFER, data_size, data, GL_STATIC_DRAW);
+       check_error();
+       glBindBuffer(GL_ARRAY_BUFFER, 0);
+       check_error();
+
+       return vbo;
+}
+
 GLuint fill_vertex_attribute(GLuint glsl_program_num, const string &attribute_name, GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data)
 {
        int attrib = glGetAttribLocation(glsl_program_num, attribute_name.c_str());
@@ -279,13 +294,10 @@ GLuint fill_vertex_attribute(GLuint glsl_program_num, const string &attribute_na
                return -1;
        }
 
-       GLuint vbo;
-       glGenBuffers(1, &vbo);
-       check_error();
+       GLuint vbo = generate_vbo(size, type, data_size, data);
+
        glBindBuffer(GL_ARRAY_BUFFER, vbo);
        check_error();
-       glBufferData(GL_ARRAY_BUFFER, data_size, data, GL_STATIC_DRAW);
-       check_error();
        glEnableVertexAttribArray(attrib);
        check_error();
        glVertexAttribPointer(attrib, size, type, GL_FALSE, 0, BUFFER_OFFSET(0));
diff --git a/util.h b/util.h
index 8cabaf7..f5b5dd3 100644 (file)
--- a/util.h
+++ b/util.h
@@ -74,6 +74,9 @@ template<class DestFloat>
 void combine_two_samples(float w1, float w2, float pos1, float pos2, float num_subtexels, float inv_num_subtexels,
                          DestFloat *offset, DestFloat *total_weight, float *sum_sq_error);
 
+// Create a VBO with the given data. Returns the VBO number.
+GLuint generate_vbo(GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data);
+
 // Create a VBO with the given data, and bind it to the vertex attribute
 // with name <attribute_name>. Returns the VBO number.
 GLuint fill_vertex_attribute(GLuint glsl_program_num, const std::string &attribute_name, GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data);