]> git.sesse.net Git - casparcg/commitdiff
2.0. - Added GPU fences for read-back which will avoid blocking the rendering thread...
authorRonag <Ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sat, 6 Aug 2011 18:20:11 +0000 (18:20 +0000)
committerRonag <Ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sat, 6 Aug 2011 18:20:11 +0000 (18:20 +0000)
     - GPU fence implemented is only supported by NVIDIA cards, which will cause incompability with AMD cards, this should be fixed in the future.
     - Fixed performance bug which caused the rendering thread to wait unbuffered for rendering to finish. This basicly caused the rendering thread to be unnecessarily blocked. Detected thanks to fences.

git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches/2.0.0.2@1070 362d55ac-95cf-4e76-9f9a-cbaa9c17b72d

common/gl/gl_check.h
core/consumer/output.cpp
core/mixer/gpu/device_buffer.cpp
core/mixer/gpu/host_buffer.cpp
core/mixer/gpu/host_buffer.h
core/mixer/read_frame.cpp
core/mixer/read_frame.h

index 177ecbd65619aa7d2d06a55accbd06c0245a8ed6..fe573e42f0772cd412c46bfc763286d2736a5cc5 100644 (file)
@@ -49,6 +49,14 @@ void SMFL_GLCheckError(const std::string& expr, const std::string& File, unsigne
                (expr);  \\r
                caspar::gl::SMFL_GLCheckError(CASPAR_GL_EXPR_STR(expr), __FILE__, __LINE__);\\r
        }while(0);\r
+\r
+#define GL2(expr) \\r
+       [&]() -> decltype(expr)\\r
+       {\\r
+               auto ret = (expr); \\r
+               caspar::gl::SMFL_GLCheckError(CASPAR_GL_EXPR_STR(expr), __FILE__, __LINE__);\\r
+               return ret;\\r
+       }()\r
 //#else\r
 //#define GL(expr) expr\r
 //#endif\r
index e30382a97c6040832d5f92d92131c76c37338dcb..48b8ea2e0606bc7274141dba8bb21331f35b6921 100644 (file)
@@ -134,7 +134,7 @@ public:
 \r
                                        auto frame = consumer->key_only() ? key : fill;\r
 \r
-                                       if(static_cast<size_t>(frame->image_data().size()) == consumer->get_video_format_desc().size)\r
+                                       if(frame->image_size() == consumer->get_video_format_desc().size)\r
                                                consumer->send(frame);\r
 \r
                                        ++it;\r
index fa4699b4742f97ea28d53f7cc3fc4a01d35f9a54..891fd6faa17fd5d87aa83e7f38307510abf4bd55 100644 (file)
@@ -103,6 +103,8 @@ public:
                GL(glReadPixels(0, 0, width_, height_, FORMAT[stride_], GL_UNSIGNED_BYTE, NULL));\r
                target.unbind();\r
                GL(glBindTexture(GL_TEXTURE_2D, 0));\r
+               target.fence_set();\r
+               glFlush();\r
        }\r
 \r
        void attach(int index)\r
index 05b698498af9541989ea7dde3c2f4132a387311e..943014bc6a675decb7d3868da0b4cbecf456043e 100644 (file)
@@ -28,6 +28,7 @@ namespace caspar { namespace core {
 struct host_buffer::implementation : boost::noncopyable\r
 {      \r
        GLuint pbo_;\r
+       GLuint fence_;\r
 \r
        const size_t size_;\r
 \r
@@ -42,6 +43,7 @@ public:
                , pbo_(0)\r
                , target_(usage == write_only ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER)\r
                , usage_(usage == write_only ? GL_STREAM_DRAW : GL_STREAM_READ)\r
+               , fence_(0)\r
        {\r
                GL(glGenBuffers(1, &pbo_));\r
                GL(glBindBuffer(target_, pbo_));\r
@@ -59,6 +61,9 @@ public:
        {\r
                try\r
                {\r
+                       if(fence_)\r
+                               glDeleteFencesNV(1, &fence_);\r
+\r
                        GL(glDeleteBuffers(1, &pbo_));\r
                }\r
                catch(...)\r
@@ -76,7 +81,7 @@ public:
                        GL(glBufferData(target_, size_, NULL, usage_)); // Notify OpenGL that we don't care about previous data.\r
                \r
                GL(glBindBuffer(target_, pbo_));\r
-               data_ = glMapBuffer(target_, usage_ == GL_STREAM_DRAW ? GL_WRITE_ONLY : GL_READ_ONLY);  \r
+               data_ = GL2(glMapBuffer(target_, usage_ == GL_STREAM_DRAW ? GL_WRITE_ONLY : GL_READ_ONLY));  \r
                GL(glBindBuffer(target_, 0)); \r
                if(!data_)\r
                        BOOST_THROW_EXCEPTION(invalid_operation() << msg_info("Failed to map target_ OpenGL Pixel Buffer Object."));\r
@@ -102,6 +107,20 @@ public:
        {\r
                GL(glBindBuffer(target_, 0));\r
        }\r
+       \r
+       void fence_set()\r
+       {\r
+               if(fence_)\r
+                       glDeleteFencesNV(1, &fence_);\r
+                       \r
+               GL(glGenFencesNV(1, &fence_));\r
+               GL(glSetFenceNV(fence_, GL_ALL_COMPLETED_NV));\r
+       }\r
+\r
+       bool fence_rdy() const\r
+       {\r
+               return GL2(glTestFenceNV(fence_)) != GL_FALSE;\r
+       }\r
 };\r
 \r
 host_buffer::host_buffer(size_t size, usage_t usage) : impl_(new implementation(size, usage)){}\r
@@ -111,6 +130,8 @@ void host_buffer::map(){impl_->map();}
 void host_buffer::unmap(){impl_->unmap();}\r
 void host_buffer::bind(){impl_->bind();}\r
 void host_buffer::unbind(){impl_->unbind();}\r
+void host_buffer::fence_set(){impl_->fence_set();}\r
+bool host_buffer::fence_rdy() const{return impl_->fence_rdy();}\r
 \r
 size_t host_buffer::size() const { return impl_->size_; }\r
 \r
index 3fba42e8df6a8e0ffa6f7d011e1dbdb913d53c74..2e1484545e6bd13fe005b73a4fda71794dd0cc62 100644 (file)
@@ -43,6 +43,9 @@ public:
 \r
        void map();\r
        void unmap();\r
+\r
+       void fence_set();\r
+       bool fence_rdy() const;\r
 private:\r
        friend class ogl_device;\r
        host_buffer(size_t size, usage_t usage);\r
index 002e1458dcd7ee6deb585127c4a8a82454144b6e..e096c9e76351722ddc35ca2211c1b179b657508f 100644 (file)
@@ -42,6 +42,21 @@ public:
        {\r
                if(!image_data_->data())\r
                {\r
+                       auto fence_check = [=]{return image_data_->fence_rdy();};\r
+\r
+                       int delay = 0;\r
+                       if(!ogl_.invoke(fence_check, high_priority))\r
+                       {\r
+                               while(!ogl_.invoke(fence_check, normal_priority))\r
+                               {\r
+                                       delay += 3;\r
+                                       Sleep(3);\r
+                               }\r
+                       }\r
+\r
+                       if(delay > 0)\r
+                               CASPAR_LOG(warning) << L" Performance warning. GPU was not ready during requested host read-back. Delayed by atleast: " << delay << L" ms.";\r
+\r
                        ogl_.invoke([=]\r
                        {\r
                                image_data_->map();\r
@@ -70,6 +85,8 @@ const boost::iterator_range<const int16_t*> read_frame::audio_data()
        return impl_ ? impl_->audio_data() : boost::iterator_range<const int16_t*>();\r
 }\r
 \r
+size_t read_frame::image_size() const{return impl_->image_data_->size();}\r
+\r
 //#include <tbb/scalable_allocator.h>\r
 //#include <tbb/parallel_for.h>\r
 //#include <tbb/enumerable_thread_specific.h>\r
index 35ae10432fad495e5a997b8318fd722b734dd62f..1b2999952e8a4a6d7b936fa718d2b327f9c1ad5c 100644 (file)
@@ -41,6 +41,8 @@ public:
 \r
        virtual const boost::iterator_range<const uint8_t*> image_data();\r
        virtual const boost::iterator_range<const int16_t*> audio_data();\r
+\r
+       virtual size_t image_size() const;\r
                \r
 private:\r
        struct implementation;\r