]> git.sesse.net Git - casparcg/commitdiff
2.1.0: image_mixer: Optimized transfers.
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sat, 4 Feb 2012 22:42:17 +0000 (22:42 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sat, 4 Feb 2012 22:42:17 +0000 (22:42 +0000)
git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches/2.1.0@2246 362d55ac-95cf-4e76-9f9a-cbaa9c17b72d

accelerator/image/image_kernel.cpp
accelerator/image/image_kernel.h
accelerator/image/image_mixer.cpp
accelerator/ogl/context.cpp
accelerator/ogl/context.h
common/concurrency/executor.h
modules/flash/producer/flash_producer.cpp
modules/screen/consumer/screen_consumer.cpp

index 179d6add43d7694b59e72b647be64ead4711c109..eb2ce3dbb8161b94909aaa49f8109d153d2c8931 100644 (file)
@@ -69,6 +69,8 @@ struct image_kernel::impl : boost::noncopyable
        void draw(draw_params&& params)\r
        {\r
                static const double epsilon = 0.001;\r
+               \r
+               ogl_->yield();\r
 \r
                CASPAR_ASSERT(params.pix_desc.planes.size() == params.textures.size());\r
 \r
@@ -231,5 +233,9 @@ void image_kernel::draw(draw_params&& params)
 {\r
        impl_->draw(std::move(params));\r
 }\r
+bool image_kernel::has_blend_modes() const\r
+{\r
+       return impl_->blend_modes_;\r
+}\r
 \r
 }}}
\ No newline at end of file
index 24dc2d7e36a02b08b0eed2b63758492f62c052f4..771adebffd986f2cadc482b6bad73d741c6f8215 100644 (file)
@@ -67,6 +67,7 @@ class image_kernel sealed : boost::noncopyable
 public:\r
        image_kernel(const spl::shared_ptr<class context>& ogl);\r
        void draw(draw_params&& params);\r
+       bool has_blend_modes() const;\r
 private:\r
        struct impl;\r
        spl::shared_ptr<impl> impl_;\r
index f64750e011b1f2c253481eed9f2750c003f95dee..b72344b7789b262ee96efbdc4d53fe3afe7ee182 100644 (file)
@@ -71,94 +71,89 @@ typedef std::pair<core::blend_mode, std::vector<item>> layer;
 class image_renderer\r
 {\r
        spl::shared_ptr<context>        ogl_;\r
-       image_kernel                            kernel_;        \r
+       image_kernel                            kernel_;\r
+       bool                                            warm_;\r
 public:\r
        image_renderer(const spl::shared_ptr<context>& ogl)\r
                : ogl_(ogl)\r
                , kernel_(ogl_)\r
+               , warm_(false)\r
        {\r
        }\r
        \r
        boost::unique_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)\r
        {       \r
-               if(layers.empty())\r
+               // Remove empty layers.\r
+               boost::range::remove_erase_if(layers, [](const layer& layer)\r
                {\r
-                       // Bypass GPU since no work needs to be done.\r
-                       auto buffer = ogl_->create_host_buffer(format_desc.size, host_buffer::usage::write_only);\r
-                       A_memset(buffer->data(), 0, buffer->size());\r
+                       return layer.second.empty();\r
+               });\r
 \r
-                       return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>\r
-                       {\r
-                               auto ptr = reinterpret_cast<const uint8_t*>(buffer->data());\r
-                               return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());\r
-                       });\r
-               }\r
-               else if(layers.size() == 1 && \r
-                               layers.at(0).first == core::blend_mode::normal &&\r
-                               layers.at(0).second.at(0).pix_desc.format == core::pixel_format::bgra)\r
-               { \r
-                       // Bypass GPU since no work needs to be done.\r
-                       auto buffer = layers.at(0).second.at(0).buffers.at(0);\r
-                       return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>\r
-                       {\r
-                               auto ptr = reinterpret_cast<const uint8_t*>(buffer->data());\r
-                               return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());\r
-                       });\r
-               }\r
-               else\r
+               // Start host->device transfers.\r
+               std::map<host_buffer*, boost::shared_future<spl::shared_ptr<device_buffer>>> buffer_map;\r
+               BOOST_FOREACH(auto& layer, layers)\r
                {\r
-                       // Start host->device transfers.\r
-                       BOOST_FOREACH(auto& layer, layers)\r
-                       {\r
-                               BOOST_FOREACH(auto& item, layer.second)\r
-                               {\r
-                                       for(size_t n = 0; n < item.pix_desc.planes.size(); ++n)         \r
-                                               item.textures.push_back(ogl_->copy_async(item.buffers.at(n), item.pix_desc.planes[n].width, item.pix_desc.planes[n].height, item.pix_desc.planes[n].channels));\r
-                               }\r
-                       }               \r
-\r
-                       // Draw\r
-                       boost::shared_future<spl::shared_ptr<host_buffer>> buffer = ogl_->begin_invoke([=]() mutable -> spl::shared_ptr<host_buffer>\r
+                       BOOST_FOREACH(auto& item, layer.second)\r
                        {\r
-                               auto draw_buffer = create_mixer_buffer(4, format_desc);\r
-\r
-                               if(format_desc.field_mode != core::field_mode::progressive)\r
+                               for(size_t n = 0; n < item.pix_desc.planes.size(); ++n) \r
                                {\r
-                                       auto upper = layers;\r
-                                       auto lower = std::move(layers);\r
-\r
-                                       BOOST_FOREACH(auto& layer, upper)\r
+                                       auto host_buffer = item.buffers.at(n);\r
+                                       auto it                  = buffer_map.find(host_buffer.get());\r
+                                       if(it == buffer_map.end())\r
                                        {\r
-                                               BOOST_FOREACH(auto& item, layer.second)\r
-                                                       item.transform.field_mode = static_cast<core::field_mode>(item.transform.field_mode & core::field_mode::upper);\r
+                                               auto plane                                      = item.pix_desc.planes[n];\r
+                                               auto future_device_buffer       = ogl_->copy_async(host_buffer, plane.width, plane.height, plane.channels);\r
+                                               it = buffer_map.insert(std::make_pair(host_buffer.get(), std::move(future_device_buffer))).first;\r
                                        }\r
+                                       item.textures.push_back(it->second);\r
+                               }       \r
+                               item.buffers.clear();\r
+                       }\r
+               }               \r
+\r
+               // Draw\r
+               boost::shared_future<spl::shared_ptr<host_buffer>> buffer = ogl_->begin_invoke([=]() mutable -> spl::shared_ptr<host_buffer>\r
+               {\r
+                       auto draw_buffer = create_mixer_buffer(4, format_desc);\r
 \r
-                                       BOOST_FOREACH(auto& layer, lower)\r
-                                       {\r
-                                               BOOST_FOREACH(auto& item, layer.second)\r
-                                                       item.transform.field_mode = static_cast<core::field_mode>(item.transform.field_mode & core::field_mode::lower);\r
-                                       }\r
+                       if(format_desc.field_mode != core::field_mode::progressive)\r
+                       {\r
+                               auto upper = layers;\r
+                               auto lower = std::move(layers);\r
 \r
-                                       draw(std::move(upper), draw_buffer, format_desc);\r
-                                       draw(std::move(lower), draw_buffer, format_desc);\r
+                               BOOST_FOREACH(auto& layer, upper)\r
+                               {\r
+                                       BOOST_FOREACH(auto& item, layer.second)\r
+                                               item.transform.field_mode = static_cast<core::field_mode>(item.transform.field_mode & core::field_mode::upper);\r
                                }\r
-                               else\r
+\r
+                               BOOST_FOREACH(auto& layer, lower)\r
                                {\r
-                                       draw(std::move(layers), draw_buffer, format_desc);\r
+                                       BOOST_FOREACH(auto& item, layer.second)\r
+                                               item.transform.field_mode = static_cast<core::field_mode>(item.transform.field_mode & core::field_mode::lower);\r
                                }\r
-                       \r
-                               auto result = ogl_->create_host_buffer(static_cast<int>(format_desc.size), host_buffer::usage::read_only); \r
-                               draw_buffer->copy_to(result);                                                   \r
-                               return result;\r
-                       });\r
 \r
-                       // Defer memory mapping.\r
-                       return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>\r
+                               draw(std::move(upper), draw_buffer, format_desc);\r
+                               draw(std::move(lower), draw_buffer, format_desc);\r
+                       }\r
+                       else\r
                        {\r
-                               auto ptr = reinterpret_cast<const uint8_t*>(buffer.get()->data()); // .get() and ->data() can block calling thread, ->data() can also block OpenGL thread, defer it as long as possible.\r
-                               return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());\r
-                       });\r
-               }\r
+                               draw(std::move(layers), draw_buffer, format_desc);\r
+                       }\r
+                       \r
+                       auto result = ogl_->create_host_buffer(static_cast<int>(format_desc.size), host_buffer::usage::read_only); \r
+                       draw_buffer->copy_to(result);                                                   \r
+                       return result;\r
+               });\r
+\r
+               warm_ = true;\r
+\r
+               // Defer memory mapping.\r
+               return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>\r
+               {\r
+                       auto ptr = reinterpret_cast<const uint8_t*>(buffer.get()->data()); // .get() and ->data() can block calling thread, ->data() can also block OpenGL thread, defer it as long as possible.\r
+                       return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());\r
+               });\r
        }\r
 \r
 private:\r
@@ -282,7 +277,7 @@ private:
                \r
 struct image_mixer::impl : boost::noncopyable\r
 {      \r
-       spl::shared_ptr<context>                ogl_;\r
+       spl::shared_ptr<context>                        ogl_;\r
        image_renderer                                          renderer_;\r
        std::vector<core::frame_transform>      transform_stack_;\r
        std::vector<layer>                                      layers_; // layer/stream/items\r
@@ -316,6 +311,9 @@ public:
                if(frame->get_buffers().empty())\r
                        return;\r
 \r
+               if(transform_stack_.back().field_mode == core::field_mode::empty)\r
+                       return;\r
+\r
                item item;\r
                item.pix_desc   = frame->get_pixel_format_desc();\r
                item.buffers    = frame->get_buffers();                         \r
index dc38edf1a628ebf73da4d8aa21905089b84234f0..bdd941fcae51c950d6bcdb118ad3ae24bd4a4794 100644 (file)
@@ -161,8 +161,6 @@ spl::shared_ptr<host_buffer> context::create_host_buffer(int size, host_buffer::
        if(!pool->items.try_pop(buffer))        \r
                buffer = executor_.invoke([=]{return allocate_host_buffer(size, usage);}, task_priority::high_priority);        \r
        \r
-       //++pool->usage_count;\r
-\r
        auto self = shared_from_this();\r
        bool is_write_only      = (usage == host_buffer::usage::write_only);\r
        return spl::shared_ptr<host_buffer>(buffer.get(), [=](host_buffer*) mutable\r
@@ -264,6 +262,11 @@ boost::unique_future<spl::shared_ptr<device_buffer>> context::copy_async(spl::sh
                result->copy_from(source);\r
                return result;\r
        }, task_priority::high_priority);\r
+}
+
+void context::yield()\r
+{\r
+       executor_.yield(task_priority::high_priority);\r
 }\r
 \r
 }}}\r
index 7faca54c32d01dadc59f3740bca0c0409d89d912..cc0579cf876ed35dd89bc905ff7d5d642efe61ad 100644 (file)
@@ -77,6 +77,8 @@ public:
        void attach(device_buffer& texture);\r
        void clear(device_buffer& texture);             \r
        void use(shader& shader);\r
+
+       void yield();
        \r
        spl::shared_ptr<device_buffer>                                                  create_device_buffer(int width, int height, int stride);\r
        spl::shared_ptr<host_buffer>                                                    create_host_buffer(int size, host_buffer::usage usage);\r
index 14d9c1d1f2982f619de1ee0a25b0b2137e638c62..6169dc831059326158f17a8a922df7826a1a461d 100644 (file)
@@ -198,6 +198,24 @@ public:
                if(func)\r
                        func();\r
        }\r
+\r
+       void yield(task_priority priority) // noexcept\r
+       {\r
+               if(boost::this_thread::get_id() != thread_.get_id())\r
+                       BOOST_THROW_EXCEPTION(invalid_operation() << msg_info("Executor can only yield inside of thread context."));\r
+\r
+               if(priority == task_priority::high_priority)\r
+               {\r
+                       std::function<void()> func2;\r
+                       while(execution_queue_[task_priority::high_priority].try_pop(func2))\r
+                       {\r
+                               if(func2)\r
+                                       func2();\r
+                       }       \r
+               }\r
+               else\r
+                       yield();\r
+       }\r
                \r
        function_queue::size_type size() const /*noexcept*/\r
        {\r
index b968bc8dfb666eb2fd05eee8c670d5fe4f6ca2dc..077e2134856069abdfb6051e1abef625d9c4685e 100644 (file)
@@ -314,26 +314,26 @@ public:
 \r
 struct flash_producer : public core::frame_producer\r
 {      \r
-       const std::wstring                                                                                      filename_;      \r
+       const std::wstring                                                                                                      filename_;      \r
        const spl::shared_ptr<core::frame_factory>                                                      frame_factory_;\r
-       const int                                                                                                       width_;\r
-       const int                                                                                                       height_;\r
-       const int                                                                                                       buffer_size_;\r
+       const int                                                                                                                       width_;\r
+       const int                                                                                                                       height_;\r
+       const int                                                                                                                       buffer_size_;\r
 \r
-       tbb::atomic<int>                                                                                        fps_;\r
-       tbb::atomic<bool>                                                                                       sync_;\r
+       tbb::atomic<int>                                                                                                        fps_;\r
+       tbb::atomic<bool>                                                                                                       sync_;\r
 \r
-       spl::shared_ptr<diagnostics::graph>                                                             graph_;\r
+       spl::shared_ptr<diagnostics::graph>                                                                     graph_;\r
 \r
        std::queue<spl::shared_ptr<core::draw_frame>>                                           frame_buffer_;\r
        tbb::concurrent_bounded_queue<spl::shared_ptr<core::draw_frame>>        output_buffer_;\r
        \r
-       mutable tbb::spin_mutex                                                                         last_frame_mutex_;\r
+       mutable tbb::spin_mutex                                                                                         last_frame_mutex_;\r
        spl::shared_ptr<core::draw_frame>                                                                       last_frame_;\r
                \r
-       std::unique_ptr<flash_renderer>                                                         renderer_;\r
+       std::unique_ptr<flash_renderer>                                                                         renderer_;\r
 \r
-       executor                                                                                                        executor_;      \r
+       executor                                                                                                                                executor_;      \r
 public:\r
        flash_producer(const spl::shared_ptr<core::frame_factory>& frame_factory, const std::wstring& filename, int width, int height) \r
                : filename_(filename)           \r
@@ -341,7 +341,7 @@ public:
                , last_frame_(core::draw_frame::empty())\r
                , width_(width > 0 ? width : frame_factory->get_video_format_desc().width)\r
                , height_(height > 0 ? height : frame_factory->get_video_format_desc().height)\r
-               , buffer_size_(env::properties().get(L"configuration.flash.buffer-depth", frame_factory_->get_video_format_desc().fps > 30.0 ? 3 : 2))\r
+               , buffer_size_(env::properties().get(L"configuration.flash.buffer-depth", frame_factory_->get_video_format_desc().fps > 30.0 ? 4 : 2))\r
                , executor_(L"flash_producer")\r
        {       \r
                sync_ = true;\r
index 48a90436073731fcd1b68c1ef691e53e419474c5..2ba4f49edebd85a8c444523d02485782f113dc9b 100644 (file)
@@ -236,16 +236,16 @@ public:
                glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, format_desc_.size, 0, GL_STREAM_DRAW_ARB);\r
                glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);\r
                \r
-               if(config_.vsync)\r
+               auto wglSwapIntervalEXT = reinterpret_cast<void(APIENTRY*)(int)>(wglGetProcAddress("wglSwapIntervalEXT"));\r
+               if(wglSwapIntervalEXT)\r
                {\r
-                       auto wglSwapIntervalEXT = reinterpret_cast<void(APIENTRY*)(int)>(wglGetProcAddress("wglSwapIntervalEXT"));\r
-                       if(wglSwapIntervalEXT)\r
+                       if(config_.vsync)\r
                        {\r
                                wglSwapIntervalEXT(1);\r
-                               CASPAR_LOG(info) << print() << " Successfully enabled vsync.";\r
+                               CASPAR_LOG(info) << print() << " Enabled vsync.";\r
                        }\r
                        else\r
-                               CASPAR_LOG(info) << print() << " Failed to enable vsync.";\r
+                               wglSwapIntervalEXT(0);\r
                }\r
 \r
                CASPAR_LOG(info) << print() << " Successfully Initialized.";\r
@@ -372,7 +372,7 @@ public:
                                aligned_memshfl(reinterpret_cast<char*>(ptr), av_frame->data[0], frame->image_data().size(), 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
                        else\r
                                A_memcpy(reinterpret_cast<char*>(ptr), av_frame->data[0], frame->image_data().size());\r
-\r
+                       \r
                        glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); // release the mapped buffer\r
                }\r
 \r
@@ -517,7 +517,7 @@ public:
        \r
        virtual int buffer_depth() const override\r
        {\r
-               return 1;\r
+               return 2;\r
        }\r
 \r
        virtual int index() const override\r