]> git.sesse.net Git - casparcg/blobdiff - core/consumer/output.cpp
2.0. Reduced frame latency by 3 frames.
[casparcg] / core / consumer / output.cpp
index 2f779325ecf8a8e3fb05e901e765268864424626..44774d42a3cc3776f22e671a93ffced955576129 100644 (file)
 #include <common/utility/timer.h>\r
 #include <common/memory/memshfl.h>\r
 \r
+#include <tbb/mutex.h>\r
+\r
 namespace caspar { namespace core {\r
+\r
+class key_read_frame_adapter : public core::read_frame\r
+{\r
+       ogl_device&                                              ogl_;\r
+       safe_ptr<read_frame>                     fill_;\r
+       std::shared_ptr<host_buffer>     key_;\r
+       tbb::mutex                                           mutex_;\r
+public:\r
+       key_read_frame_adapter(ogl_device& ogl, const safe_ptr<read_frame>& fill)\r
+               : ogl_(ogl)\r
+               , fill_(fill)\r
+       {\r
+       }\r
+\r
+       virtual const boost::iterator_range<const uint8_t*> image_data()\r
+       {\r
+               tbb::mutex::scoped_lock lock(mutex_);\r
+               if(!key_)\r
+               {\r
+                       key_ = ogl_.create_host_buffer(fill_->image_data().size(), host_buffer::write_only);                            \r
+                       fast_memsfhl(key_->data(), fill_->image_data().begin(), fill_->image_data().size(), 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+               }\r
+\r
+               auto ptr = static_cast<const uint8_t*>(key_->data());\r
+               return boost::iterator_range<const uint8_t*>(ptr, ptr + key_->size());\r
+       }\r
+\r
+       virtual const boost::iterator_range<const int16_t*> audio_data()\r
+       {\r
+               return fill_->audio_data();\r
+       }       \r
+};\r
        \r
 struct output::implementation\r
 {      \r
-       typedef std::pair<safe_ptr<const read_frame>, safe_ptr<const read_frame>> fill_and_key;\r
+       typedef std::pair<safe_ptr<read_frame>, safe_ptr<read_frame>> fill_and_key;\r
        \r
        video_channel_context& channel_;\r
 \r
@@ -86,7 +120,7 @@ public:
                                timer_.tick(1.0/channel_.get_format_desc().fps);\r
                                                \r
                        auto fill = frame;\r
-                       auto key = get_key_frame(frame);\r
+                       auto key = make_safe<key_read_frame_adapter>(channel_.ogl(), frame);\r
 \r
                        auto it = consumers_.begin();\r
                        while(it != consumers_.end())\r
@@ -128,25 +162,6 @@ private:
                        return p.second->has_synchronization_clock();\r
                });\r
        }\r
-\r
-       safe_ptr<const read_frame> get_key_frame(const safe_ptr<const read_frame>& frame)\r
-       {\r
-               bool has_key_only = std::any_of(consumers_.begin(), consumers_.end(), [](const decltype(*consumers_.begin())& p)\r
-               {\r
-                       return p.second->key_only();\r
-               });\r
-\r
-               if(has_key_only)\r
-               {\r
-                       // Currently do key_only transform on cpu. Unsure if the extra 400MB/s (1080p50) overhead is worth it to do it on gpu.\r
-                       auto key_data = channel_.ogl().create_host_buffer(frame->image_data().size(), host_buffer::write_only);                         \r
-                       fast_memsfhl(key_data->data(), frame->image_data().begin(), frame->image_data().size(), 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
-                       std::vector<int16_t> audio_data(frame->audio_data().begin(), frame->audio_data().end());\r
-                       return make_safe<read_frame>(channel_.ogl(), std::move(key_data), std::move(audio_data));\r
-               }\r
-               \r
-               return make_safe<read_frame>();\r
-       }\r
        \r
        std::wstring print() const\r
        {\r