Cache VAOs between runs.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 4 Jul 2017 23:06:12 +0000 (01:06 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 4 Jul 2017 23:10:57 +0000 (01:10 +0200)
NVIDIA's drivers (at least the Linux drivers) seem to have a bug
where, if you call glVertexAttribPointer() in one thread together
with some specific activity in another thread (at least another
glVertexAttribPointer() will do, but I've also seen it during
glXSwapBuffers()), it will try to free a non-malloced pointer,
crashing everything. This happens very rarely, but often enough
that it's a real problem for Nageru.

We solve it by simply pre-creating all needed VAOs ahead-of-time
and caching them, which makes us call glVertexAttribPointer()
a _lot_ less often (basically never, instead of several times
per frame). This might actually be a bit slower than the old code
(I haven't tested), but at least it works around the driver bug.

ABI break, unfortunately, but no API break.

effect_chain.cpp
effect_chain.h
resource_pool.cpp
resource_pool.h
version.h

index efd4abc..c46e60d 100644 (file)
@@ -1789,17 +1789,6 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        glDepthMask(GL_FALSE);
        check_error();
 
-       // Generate a VAO that will be used during the entire execution,
-       // and bind the VBO, since it contains all the data.
-       GLuint vao;
-       glGenVertexArrays(1, &vao);
-       check_error();
-       glBindVertexArray(vao);
-       check_error();
-       glBindBuffer(GL_ARRAY_BUFFER, vbo);
-       check_error();
-       set<GLint> bound_attribute_indices;
-
        set<Phase *> generated_mipmaps;
 
        // We choose the simplest option of having one texture per output,
@@ -1847,7 +1836,7 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                        current_srgb = true;
                }
 
-               execute_phase(phase, last_phase, &bound_attribute_indices, &output_textures, &generated_mipmaps);
+               execute_phase(phase, last_phase, &output_textures, &generated_mipmaps);
                if (do_phase_timing) {
                        glEndQuery(GL_TIME_ELAPSED);
                }
@@ -1868,8 +1857,6 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        check_error();
        glBindVertexArray(0);
        check_error();
-       glDeleteVertexArrays(1, &vao);
-       check_error();
 
        if (do_phase_timing) {
                // Get back the timer queries.
@@ -1932,7 +1919,6 @@ void EffectChain::print_phase_timing()
 }
 
 void EffectChain::execute_phase(Phase *phase, bool last_phase,
-                                set<GLint> *bound_attribute_indices,
                                 map<Phase *, GLuint> *output_textures,
                                 set<Phase *> *generated_mipmaps)
 {
@@ -1993,30 +1979,9 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
        // from there.
        setup_uniforms(phase);
 
-       // Clean up old attributes if they are no longer needed.
-       for (set<GLint>::iterator attr_it = bound_attribute_indices->begin();
-            attr_it != bound_attribute_indices->end(); ) {
-               if (phase->attribute_indexes.count(*attr_it) == 0) {
-                       glDisableVertexAttribArray(*attr_it);
-                       check_error();
-                       bound_attribute_indices->erase(attr_it++);
-               } else {
-                       ++attr_it;
-               }
-       }
-
-       // Set up the new attributes, if needed.
-       for (set<GLint>::iterator attr_it = phase->attribute_indexes.begin();
-            attr_it != phase->attribute_indexes.end();
-            ++attr_it) {
-               if (bound_attribute_indices->count(*attr_it) == 0) {
-                       glEnableVertexAttribArray(*attr_it);
-                       check_error();
-                       glVertexAttribPointer(*attr_it, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
-                       check_error();
-                       bound_attribute_indices->insert(*attr_it);
-               }
-       }
+       // Bind the vertex data.
+       GLuint vao = resource_pool->create_vec2_vao(phase->attribute_indexes, vbo);
+       glBindVertexArray(vao);
 
        glDrawArrays(GL_TRIANGLES, 0, 3);
        check_error();
@@ -2027,6 +1992,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
        }
 
        resource_pool->unuse_glsl_program(instance_program_num);
+       resource_pool->release_vec2_vao(vao);
 
        if (!last_phase) {
                resource_pool->release_fbo(fbo);
index 6a65399..cd0a19f 100644 (file)
@@ -443,7 +443,6 @@ private:
 
        // Execute one phase, ie. set up all inputs, effects and outputs, and render the quad.
        void execute_phase(Phase *phase, bool last_phase,
-                          std::set<GLint> *bound__attribute_indices,
                           std::map<Phase *, GLuint> *output_textures,
                           std::set<Phase *> *generated_mipmaps);
 
index 5b04999..6228fa2 100644 (file)
@@ -18,10 +18,12 @@ namespace movit {
 
 ResourcePool::ResourcePool(size_t program_freelist_max_length,
                            size_t texture_freelist_max_bytes,
-                           size_t fbo_freelist_max_length)
+                           size_t fbo_freelist_max_length,
+                           size_t vao_freelist_max_length)
        : program_freelist_max_length(program_freelist_max_length),
          texture_freelist_max_bytes(texture_freelist_max_bytes),
          fbo_freelist_max_length(fbo_freelist_max_length),
+         vao_freelist_max_length(vao_freelist_max_length),
          texture_freelist_bytes(0)
 {
        pthread_mutex_init(&lock, NULL);
@@ -527,14 +529,83 @@ void ResourcePool::release_fbo(GLuint fbo_num)
        pthread_mutex_unlock(&lock);
 }
 
+GLuint ResourcePool::create_vec2_vao(const set<GLint> &attribute_indices, GLuint vbo_num)
+{
+       void *context = get_gl_context_identifier();
+
+       pthread_mutex_lock(&lock);
+       if (vao_freelist.count(context) != 0) {
+               // See if there's a VAO the freelist we can use.
+               list<VAOFormatIterator>::iterator end = vao_freelist[context].end();
+               for (list<VAOFormatIterator>::iterator freelist_it = vao_freelist[context].begin();
+                    freelist_it != end; ++freelist_it) {
+                       VAOFormatIterator vao_it = *freelist_it;
+                       if (vao_it->second.vbo_num == vbo_num &&
+                           vao_it->second.attribute_indices == attribute_indices) {
+                               vao_freelist[context].erase(freelist_it);
+                               pthread_mutex_unlock(&lock);
+                               return vao_it->second.vao_num;
+                       }
+               }
+       }
+       pthread_mutex_unlock(&lock);
+
+       // Create a new one.
+       VAO vao_format;
+       vao_format.attribute_indices = attribute_indices;
+       vao_format.vbo_num = vbo_num;
+
+       glGenVertexArrays(1, &vao_format.vao_num);
+       check_error();
+       glBindVertexArray(vao_format.vao_num);
+       check_error();
+       glBindBuffer(GL_ARRAY_BUFFER, vbo_num);
+       check_error();
+
+       for (set<GLint>::const_iterator attr_it = attribute_indices.begin(); attr_it != attribute_indices.end(); ++attr_it) {
+               glEnableVertexAttribArray(*attr_it);
+               check_error();
+               glVertexAttribPointer(*attr_it, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+               check_error();
+       }
+
+       glBindVertexArray(0);
+       check_error();
+       glBindBuffer(GL_ARRAY_BUFFER, 0);
+       check_error();
+
+       pair<void *, GLuint> key(context, vao_format.vao_num);
+       assert(vao_formats.count(key) == 0);
+       vao_formats.insert(make_pair(key, vao_format));
+
+       pthread_mutex_unlock(&lock);
+       return vao_format.vao_num;
+}
+
+void ResourcePool::release_vec2_vao(GLuint vao_num)
+{
+       void *context = get_gl_context_identifier();
+
+       pthread_mutex_lock(&lock);
+       VAOFormatIterator vao_it = vao_formats.find(make_pair(context, vao_num));
+       assert(vao_it != vao_formats.end());
+       vao_freelist[context].push_front(vao_it);
+
+       shrink_vao_freelist(context, vao_freelist_max_length);
+       pthread_mutex_unlock(&lock);
+}
+
 void ResourcePool::clean_context()
 {
        void *context = get_gl_context_identifier();
 
-       // Currently, we only need to worry about FBOs, as they are the only
-       // non-shareable resource we hold.
+       // Currently, we only need to worry about FBOs and VAOs, as they are the only
+       // non-shareable resources we hold.
        shrink_fbo_freelist(context, 0);
        fbo_freelist.erase(context);
+
+       shrink_vao_freelist(context, 0);
+       vao_freelist.erase(context);
 }
 
 void ResourcePool::cleanup_unlinked_fbos(void *context)
@@ -574,6 +645,18 @@ void ResourcePool::shrink_fbo_freelist(void *context, size_t max_length)
        }
 }
 
+void ResourcePool::shrink_vao_freelist(void *context, size_t max_length)
+{
+       list<VAOFormatIterator> &freelist = vao_freelist[context];
+       while (freelist.size() > max_length) {
+               VAOFormatIterator free_vao_it = freelist.back();
+               glDeleteVertexArrays(1, &free_vao_it->second.vao_num);
+               check_error();
+               vao_formats.erase(free_vao_it);
+               freelist.pop_back();
+       }
+}
+
 size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format)
 {
        size_t bytes_per_pixel;
index 8503b29..f919536 100644 (file)
@@ -27,6 +27,7 @@
 #include <stddef.h>
 #include <list>
 #include <map>
+#include <set>
 #include <stack>
 #include <string>
 #include <utility>
@@ -49,7 +50,8 @@ public:
        // twice this estimate or more.
        ResourcePool(size_t program_freelist_max_length = 100,
                     size_t texture_freelist_max_bytes = 100 << 20,  // 100 MB.
-                    size_t fbo_freelist_max_length = 100);  // Per context.
+                    size_t fbo_freelist_max_length = 100,  // Per context.
+                    size_t vao_freelist_max_length = 100);  // Per context.
        ~ResourcePool();
 
        // All remaining functions are intended for calls from EffectChain only.
@@ -104,6 +106,19 @@ public:
                          GLuint texture3_num = 0);
        void release_fbo(GLuint fbo_num);
 
+       // Create a VAO of a very specific form: All the given attribute indices
+       // are bound to start of the  given VBO and contain two-component floats.
+       // Keeps ownership of the VAO; you must call release_vec2_vao() of deleting
+       // it when you no longer want it. VAOs are not sharable across contexts.
+       //
+       // These are not cached primarily for performance, but rather to work
+       // around an NVIDIA driver bug where glVertexAttribPointer() is thread-hostile
+       // (ie., simultaneous GL work in unrelated contexts can cause the driver
+       // to free() memory that was never malloc()-ed).
+       GLuint create_vec2_vao(const std::set<GLint> &attribute_indices,
+                              GLuint vbo_num);
+       void release_vec2_vao(const GLuint vao_num);
+
        // Informs the ResourcePool that the current context is going away soon,
        // and that any resources held for it in the freelist should be deleted.
        //
@@ -123,6 +138,9 @@ private:
        // is no more than <max_length> elements long.
        void shrink_fbo_freelist(void *context, size_t max_length);
 
+       // Same, for VAOs.
+       void shrink_vao_freelist(void *context, size_t max_length);
+
        // Link the given vertex and fragment shaders into a full GLSL program.
        // See compile_glsl_program() for explanation of <fragment_shader_outputs>.
        static GLuint link_program(GLuint vs_obj,
@@ -132,7 +150,7 @@ private:
        // Protects all the other elements in the class.
        pthread_mutex_t lock;
 
-       size_t program_freelist_max_length, texture_freelist_max_bytes, fbo_freelist_max_length;
+       size_t program_freelist_max_length, texture_freelist_max_bytes, fbo_freelist_max_length, vao_freelist_max_length;
                
        // A mapping from vertex/fragment shader source strings to compiled program number.
        std::map<std::pair<std::string, std::string>, GLuint> programs;
@@ -204,6 +222,16 @@ private:
        // We store iterators directly into <fbo_format> for efficiency.
        std::map<void *, std::list<FBOFormatIterator> > fbo_freelist;
 
+       // Very similar, for VAOs.
+       struct VAO {
+               GLuint vao_num;
+               std::set<GLint> attribute_indices;
+               GLuint vbo_num;
+       };
+       std::map<std::pair<void *, GLuint>, VAO> vao_formats;
+       typedef std::map<std::pair<void *, GLuint>, VAO>::iterator VAOFormatIterator;
+       std::map<void *, std::list<VAOFormatIterator> > vao_freelist;
+
        // See the caveats at the constructor.
        static size_t estimate_texture_size(const Texture2D &texture_format);
 };
index 9935982..12f73f0 100644 (file)
--- a/version.h
+++ b/version.h
@@ -5,6 +5,6 @@
 // changes, even within git versions. There is no specific version
 // documentation outside the regular changelogs, though.
 
-#define MOVIT_VERSION 29
+#define MOVIT_VERSION 30
 
 #endif // !defined(_MOVIT_VERSION_H)