From: Steinar H. Gunderson Date: Wed, 8 Jan 2014 22:56:43 +0000 (+0100) Subject: Replace glFinish with OpenGL fences. X-Git-Url: https://git.sesse.net/?p=mlt;a=commitdiff_plain;h=c78b8c108bc24df6915352c25f41aa58fecec0e2 Replace glFinish with OpenGL fences. The glFinish after rendering to a texture serves two purposes: First, and maybe most importantly, it makes sure that if we send the texture ID to another thread and try to draw it there, it is actually valid in that context. (If not, the command to allocate it could still be stuck in the queue, or the command to draw the quad to the screen could be queued before the command to actually render the image to the texture.) Second, it makes sure we don't overwhelm the GPU with rendering commands, especially in the readahead thread. GPUs have a long pipeline, and our commands buffers are typically very short (we render only one or a few quads per frame), which means that we could queue so much rendering that we couldn't actually get to display the frames, or do compositing and other normal UI tasks. (GPUs are not all that good at scheduling.) However, glFinish() also has an unwanted side effect: Since the CPU waits for the GPU to finish, it means it cannot do anything useful in that period; in particular, it cannot start decoding input video for the next frame, which is very frequently a win. Thus, we replace glFinish() with fences: One that we store on the frame and that the client can wait for, and one that we wait for ourselves before we render the next frame. The first fulfills purpose #1 above (although a client that doesn't render in a different thread can just ignore it), while the second fulfills purpose #2. #2 does reduce the possible pipelining somewhat (compared to not having any fence at all), but it seems that the actual performance lost is very small in practice. In any case, this is markedly faster than glFinish -- on my Intel HD 3000, it increases GPU utilization from ~40% to over 80% in a typical transition. Note that this is an API change; a client that wants to send the OpenGL texture number on to a different thread for display, will now need to wait for the fence before it can actually draw using it. --- diff --git a/src/modules/opengl/filter_glsl_manager.cpp b/src/modules/opengl/filter_glsl_manager.cpp index 3cf75683..68ef63dc 100644 --- a/src/modules/opengl/filter_glsl_manager.cpp +++ b/src/modules/opengl/filter_glsl_manager.cpp @@ -51,6 +51,7 @@ GlslManager::GlslManager() , pbo(0) , initEvent(0) , closeEvent(0) + , prev_sync(NULL) { mlt_filter filter = get_filter(); if ( filter ) { @@ -76,6 +77,9 @@ GlslManager::~GlslManager() // delete (glsl_texture) texture_list.pop_back(); delete initEvent; delete closeEvent; + if (prev_sync != NULL) { + glDeleteSync( prev_sync ); + } } GlslManager* GlslManager::get_instance() @@ -194,6 +198,11 @@ void GlslManager::release_texture(glsl_texture texture) texture->used = 0; } +void GlslManager::delete_sync(GLsync sync) +{ + glDeleteSync(sync); +} + glsl_pbo GlslManager::get_pbo(int size) { lock(); @@ -408,9 +417,18 @@ int GlslManager::render_frame_texture(mlt_service service, mlt_frame frame, int glBindFramebuffer( GL_FRAMEBUFFER, 0 ); check_error(); + // Make sure we never have more than one frame pending at any time. + // This ensures we do not swamp the GPU with so much work + // that we cannot actually display the frames we generate. + if (prev_sync != NULL) { + glFlush(); + glClientWaitSync( prev_sync, 0, GL_TIMEOUT_IGNORED ); + glDeleteSync( prev_sync ); + } render_fbo( service, chain, fbo->fbo, width, height ); + prev_sync = glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); + GLsync sync = glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); - glFinish(); check_error(); glBindFramebuffer( GL_FRAMEBUFFER, 0 ); check_error(); @@ -420,6 +438,8 @@ int GlslManager::render_frame_texture(mlt_service service, mlt_frame frame, int mlt_frame_set_image( frame, *image, 0, NULL ); mlt_properties_set_data( MLT_FRAME_PROPERTIES(frame), "movit.convert.texture", texture, 0, (mlt_destructor) GlslManager::release_texture, NULL ); + mlt_properties_set_data( MLT_FRAME_PROPERTIES(frame), "movit.convert.fence", sync, 0, + (mlt_destructor) GlslManager::delete_sync, NULL ); return 0; } diff --git a/src/modules/opengl/filter_glsl_manager.h b/src/modules/opengl/filter_glsl_manager.h index 1d4a1653..5263e5cb 100644 --- a/src/modules/opengl/filter_glsl_manager.h +++ b/src/modules/opengl/filter_glsl_manager.h @@ -79,6 +79,7 @@ public: static void release_fbo(glsl_fbo); glsl_texture get_texture(int width, int height, GLint internal_format); static void release_texture(glsl_texture); + static void delete_sync(GLsync sync); glsl_pbo get_pbo(int size); void cleanupContext(); @@ -106,6 +107,7 @@ private: glsl_pbo pbo; Mlt::Event* initEvent; Mlt::Event* closeEvent; + GLsync prev_sync; }; #endif // GLSL_MANAGER_H