From 2d8043bb837b45c9ae509450b3e1b1eb545e44b9 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 5 Jul 2017 01:06:12 +0200 Subject: [PATCH] Cache VAOs between runs. NVIDIA's drivers (at least the Linux drivers) seem to have a bug where, if you call glVertexAttribPointer() in one thread together with some specific activity in another thread (at least another glVertexAttribPointer() will do, but I've also seen it during glXSwapBuffers()), it will try to free a non-malloced pointer, crashing everything. This happens very rarely, but often enough that it's a real problem for Nageru. We solve it by simply pre-creating all needed VAOs ahead-of-time and caching them, which makes us call glVertexAttribPointer() a _lot_ less often (basically never, instead of several times per frame). This might actually be a bit slower than the old code (I haven't tested), but at least it works around the driver bug. ABI break, unfortunately, but no API break. --- effect_chain.cpp | 44 +++-------------------- effect_chain.h | 1 - resource_pool.cpp | 89 +++++++++++++++++++++++++++++++++++++++++++++-- resource_pool.h | 32 +++++++++++++++-- version.h | 2 +- 5 files changed, 122 insertions(+), 46 deletions(-) diff --git a/effect_chain.cpp b/effect_chain.cpp index efd4abc..c46e60d 100644 --- a/effect_chain.cpp +++ b/effect_chain.cpp @@ -1789,17 +1789,6 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height glDepthMask(GL_FALSE); check_error(); - // Generate a VAO that will be used during the entire execution, - // and bind the VBO, since it contains all the data. - GLuint vao; - glGenVertexArrays(1, &vao); - check_error(); - glBindVertexArray(vao); - check_error(); - glBindBuffer(GL_ARRAY_BUFFER, vbo); - check_error(); - set bound_attribute_indices; - set generated_mipmaps; // We choose the simplest option of having one texture per output, @@ -1847,7 +1836,7 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height current_srgb = true; } - execute_phase(phase, last_phase, &bound_attribute_indices, &output_textures, &generated_mipmaps); + execute_phase(phase, last_phase, &output_textures, &generated_mipmaps); if (do_phase_timing) { glEndQuery(GL_TIME_ELAPSED); } @@ -1868,8 +1857,6 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height check_error(); glBindVertexArray(0); check_error(); - glDeleteVertexArrays(1, &vao); - check_error(); if (do_phase_timing) { // Get back the timer queries. @@ -1932,7 +1919,6 @@ void EffectChain::print_phase_timing() } void EffectChain::execute_phase(Phase *phase, bool last_phase, - set *bound_attribute_indices, map *output_textures, set *generated_mipmaps) { @@ -1993,30 +1979,9 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, // from there. setup_uniforms(phase); - // Clean up old attributes if they are no longer needed. - for (set::iterator attr_it = bound_attribute_indices->begin(); - attr_it != bound_attribute_indices->end(); ) { - if (phase->attribute_indexes.count(*attr_it) == 0) { - glDisableVertexAttribArray(*attr_it); - check_error(); - bound_attribute_indices->erase(attr_it++); - } else { - ++attr_it; - } - } - - // Set up the new attributes, if needed. - for (set::iterator attr_it = phase->attribute_indexes.begin(); - attr_it != phase->attribute_indexes.end(); - ++attr_it) { - if (bound_attribute_indices->count(*attr_it) == 0) { - glEnableVertexAttribArray(*attr_it); - check_error(); - glVertexAttribPointer(*attr_it, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - check_error(); - bound_attribute_indices->insert(*attr_it); - } - } + // Bind the vertex data. + GLuint vao = resource_pool->create_vec2_vao(phase->attribute_indexes, vbo); + glBindVertexArray(vao); glDrawArrays(GL_TRIANGLES, 0, 3); check_error(); @@ -2027,6 +1992,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, } resource_pool->unuse_glsl_program(instance_program_num); + resource_pool->release_vec2_vao(vao); if (!last_phase) { resource_pool->release_fbo(fbo); diff --git a/effect_chain.h b/effect_chain.h index 6a65399..cd0a19f 100644 --- a/effect_chain.h +++ b/effect_chain.h @@ -443,7 +443,6 @@ private: // Execute one phase, ie. set up all inputs, effects and outputs, and render the quad. void execute_phase(Phase *phase, bool last_phase, - std::set *bound__attribute_indices, std::map *output_textures, std::set *generated_mipmaps); diff --git a/resource_pool.cpp b/resource_pool.cpp index 5b04999..6228fa2 100644 --- a/resource_pool.cpp +++ b/resource_pool.cpp @@ -18,10 +18,12 @@ namespace movit { ResourcePool::ResourcePool(size_t program_freelist_max_length, size_t texture_freelist_max_bytes, - size_t fbo_freelist_max_length) + size_t fbo_freelist_max_length, + size_t vao_freelist_max_length) : program_freelist_max_length(program_freelist_max_length), texture_freelist_max_bytes(texture_freelist_max_bytes), fbo_freelist_max_length(fbo_freelist_max_length), + vao_freelist_max_length(vao_freelist_max_length), texture_freelist_bytes(0) { pthread_mutex_init(&lock, NULL); @@ -527,14 +529,83 @@ void ResourcePool::release_fbo(GLuint fbo_num) pthread_mutex_unlock(&lock); } +GLuint ResourcePool::create_vec2_vao(const set &attribute_indices, GLuint vbo_num) +{ + void *context = get_gl_context_identifier(); + + pthread_mutex_lock(&lock); + if (vao_freelist.count(context) != 0) { + // See if there's a VAO the freelist we can use. + list::iterator end = vao_freelist[context].end(); + for (list::iterator freelist_it = vao_freelist[context].begin(); + freelist_it != end; ++freelist_it) { + VAOFormatIterator vao_it = *freelist_it; + if (vao_it->second.vbo_num == vbo_num && + vao_it->second.attribute_indices == attribute_indices) { + vao_freelist[context].erase(freelist_it); + pthread_mutex_unlock(&lock); + return vao_it->second.vao_num; + } + } + } + pthread_mutex_unlock(&lock); + + // Create a new one. + VAO vao_format; + vao_format.attribute_indices = attribute_indices; + vao_format.vbo_num = vbo_num; + + glGenVertexArrays(1, &vao_format.vao_num); + check_error(); + glBindVertexArray(vao_format.vao_num); + check_error(); + glBindBuffer(GL_ARRAY_BUFFER, vbo_num); + check_error(); + + for (set::const_iterator attr_it = attribute_indices.begin(); attr_it != attribute_indices.end(); ++attr_it) { + glEnableVertexAttribArray(*attr_it); + check_error(); + glVertexAttribPointer(*attr_it, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); + check_error(); + } + + glBindVertexArray(0); + check_error(); + glBindBuffer(GL_ARRAY_BUFFER, 0); + check_error(); + + pair key(context, vao_format.vao_num); + assert(vao_formats.count(key) == 0); + vao_formats.insert(make_pair(key, vao_format)); + + pthread_mutex_unlock(&lock); + return vao_format.vao_num; +} + +void ResourcePool::release_vec2_vao(GLuint vao_num) +{ + void *context = get_gl_context_identifier(); + + pthread_mutex_lock(&lock); + VAOFormatIterator vao_it = vao_formats.find(make_pair(context, vao_num)); + assert(vao_it != vao_formats.end()); + vao_freelist[context].push_front(vao_it); + + shrink_vao_freelist(context, vao_freelist_max_length); + pthread_mutex_unlock(&lock); +} + void ResourcePool::clean_context() { void *context = get_gl_context_identifier(); - // Currently, we only need to worry about FBOs, as they are the only - // non-shareable resource we hold. + // Currently, we only need to worry about FBOs and VAOs, as they are the only + // non-shareable resources we hold. shrink_fbo_freelist(context, 0); fbo_freelist.erase(context); + + shrink_vao_freelist(context, 0); + vao_freelist.erase(context); } void ResourcePool::cleanup_unlinked_fbos(void *context) @@ -574,6 +645,18 @@ void ResourcePool::shrink_fbo_freelist(void *context, size_t max_length) } } +void ResourcePool::shrink_vao_freelist(void *context, size_t max_length) +{ + list &freelist = vao_freelist[context]; + while (freelist.size() > max_length) { + VAOFormatIterator free_vao_it = freelist.back(); + glDeleteVertexArrays(1, &free_vao_it->second.vao_num); + check_error(); + vao_formats.erase(free_vao_it); + freelist.pop_back(); + } +} + size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format) { size_t bytes_per_pixel; diff --git a/resource_pool.h b/resource_pool.h index 8503b29..f919536 100644 --- a/resource_pool.h +++ b/resource_pool.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,8 @@ public: // twice this estimate or more. ResourcePool(size_t program_freelist_max_length = 100, size_t texture_freelist_max_bytes = 100 << 20, // 100 MB. - size_t fbo_freelist_max_length = 100); // Per context. + size_t fbo_freelist_max_length = 100, // Per context. + size_t vao_freelist_max_length = 100); // Per context. ~ResourcePool(); // All remaining functions are intended for calls from EffectChain only. @@ -104,6 +106,19 @@ public: GLuint texture3_num = 0); void release_fbo(GLuint fbo_num); + // Create a VAO of a very specific form: All the given attribute indices + // are bound to start of the given VBO and contain two-component floats. + // Keeps ownership of the VAO; you must call release_vec2_vao() of deleting + // it when you no longer want it. VAOs are not sharable across contexts. + // + // These are not cached primarily for performance, but rather to work + // around an NVIDIA driver bug where glVertexAttribPointer() is thread-hostile + // (ie., simultaneous GL work in unrelated contexts can cause the driver + // to free() memory that was never malloc()-ed). + GLuint create_vec2_vao(const std::set &attribute_indices, + GLuint vbo_num); + void release_vec2_vao(const GLuint vao_num); + // Informs the ResourcePool that the current context is going away soon, // and that any resources held for it in the freelist should be deleted. // @@ -123,6 +138,9 @@ private: // is no more than elements long. void shrink_fbo_freelist(void *context, size_t max_length); + // Same, for VAOs. + void shrink_vao_freelist(void *context, size_t max_length); + // Link the given vertex and fragment shaders into a full GLSL program. // See compile_glsl_program() for explanation of . static GLuint link_program(GLuint vs_obj, @@ -132,7 +150,7 @@ private: // Protects all the other elements in the class. pthread_mutex_t lock; - size_t program_freelist_max_length, texture_freelist_max_bytes, fbo_freelist_max_length; + size_t program_freelist_max_length, texture_freelist_max_bytes, fbo_freelist_max_length, vao_freelist_max_length; // A mapping from vertex/fragment shader source strings to compiled program number. std::map, GLuint> programs; @@ -204,6 +222,16 @@ private: // We store iterators directly into for efficiency. std::map > fbo_freelist; + // Very similar, for VAOs. + struct VAO { + GLuint vao_num; + std::set attribute_indices; + GLuint vbo_num; + }; + std::map, VAO> vao_formats; + typedef std::map, VAO>::iterator VAOFormatIterator; + std::map > vao_freelist; + // See the caveats at the constructor. static size_t estimate_texture_size(const Texture2D &texture_format); }; diff --git a/version.h b/version.h index 9935982..12f73f0 100644 --- a/version.h +++ b/version.h @@ -5,6 +5,6 @@ // changes, even within git versions. There is no specific version // documentation outside the regular changelogs, though. -#define MOVIT_VERSION 29 +#define MOVIT_VERSION 30 #endif // !defined(_MOVIT_VERSION_H) -- 2.39.2