]> git.sesse.net Git - casparcg/blobdiff - modules/ogl/consumer/ogl_consumer.cpp
ogl: Fixed performance issues.
[casparcg] / modules / ogl / consumer / ogl_consumer.cpp
index 7d0acb226235b3a9a8abd3daf34ac7585558ffa5..abc47a1daa161e7cb819b394d8907e258193e540 100644 (file)
@@ -1,5 +1,5 @@
 /*\r
-* Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>\r
+* Copyright 2013 Sveriges Television AB http://casparcg.com/\r
 *\r
 * This file is part of CasparCG (www.casparcg.com).\r
 *\r
 #include <common/memory/memshfl.h>\r
 #include <common/utility/timer.h>\r
 #include <common/utility/string.h>\r
+#include <common/concurrency/future_util.h>\r
+#include <common/concurrency/executor.h>\r
+#include <common/exception/win32_exception.h>\r
 \r
 #include <ffmpeg/producer/filter/filter.h>\r
 \r
+#include <core/parameters/parameters.h>\r
 #include <core/video_format.h>\r
 #include <core/mixer/read_frame.h>\r
 #include <core/consumer/frame_consumer.h>\r
@@ -68,6 +72,8 @@ extern "C"
 #pragma warning (pop)\r
 #endif\r
 \r
+typedef int (*PFNWGLEXTGETSWAPINTERVALPROC) (void);\r
\r
 namespace caspar { namespace ogl {\r
                \r
 enum stretch\r
@@ -94,15 +100,19 @@ struct configuration
        bool                    auto_deinterlace;\r
        bool                    key_only;\r
        aspect_ratio    aspect; \r
+       bool                    vsync;\r
+       bool                    borderless;\r
 \r
        configuration()\r
-               : name(L"ogl")\r
+               : name(L"Screen consumer")\r
                , screen_index(0)\r
                , stretch(fill)\r
                , windowed(true)\r
                , auto_deinterlace(true)\r
                , key_only(false)\r
                , aspect(aspect_invalid)\r
+               , vsync(false)\r
+               , borderless(false)\r
        {\r
        }\r
 };\r
@@ -126,15 +136,20 @@ struct ogl_consumer : boost::noncopyable
        size_t                                  square_height_;                         \r
        \r
        sf::Window                              window_;\r
+\r
+       std::int64_t                    pts_;\r
        \r
        safe_ptr<diagnostics::graph>    graph_;\r
        boost::timer                                    perf_timer_;\r
        boost::timer                                    tick_timer_;\r
 \r
+       caspar::high_prec_timer wait_timer_;\r
+\r
        tbb::concurrent_bounded_queue<safe_ptr<core::read_frame>>       frame_buffer_;\r
 \r
        boost::thread                   thread_;\r
        tbb::atomic<bool>               is_running_;\r
+       tbb::atomic<int64_t>    current_presentation_age_;\r
        \r
        ffmpeg::filter                  filter_;\r
 public:\r
@@ -148,7 +163,27 @@ public:
                , screen_height_(format_desc.height)\r
                , square_width_(format_desc.square_width)\r
                , square_height_(format_desc.square_height)\r
-               , filter_(format_desc.field_mode == core::field_mode::progressive || !config.auto_deinterlace ? L"" : L"YADIF=0:-1", boost::assign::list_of(PIX_FMT_BGRA))\r
+               , pts_(0)\r
+               , filter_([&]() -> ffmpeg::filter\r
+               {                       \r
+                       const auto sample_aspect_ratio = \r
+                               boost::rational<int>(\r
+                                       format_desc.square_width, \r
+                                       format_desc.square_height) /\r
+                               boost::rational<int>(\r
+                                       format_desc.width, \r
+                                       format_desc.height);\r
+\r
+                       return ffmpeg::filter(\r
+                               format_desc.width,\r
+                               format_desc.height,\r
+                               boost::rational<int>(format_desc.duration, format_desc.time_scale),\r
+                               boost::rational<int>(format_desc.time_scale, format_desc.duration),\r
+                               sample_aspect_ratio,\r
+                               AV_PIX_FMT_BGRA,\r
+                               boost::assign::list_of(AV_PIX_FMT_BGRA),\r
+                               format_desc.field_mode == core::field_mode::progressive || !config.auto_deinterlace ? "" : "format=pix_fmts=gbrp,YADIF=1:-1");\r
+               }())\r
        {               \r
                if(format_desc_.format == core::video_format::ntsc && config_.aspect == configuration::aspect_4_3)\r
                {\r
@@ -167,6 +202,7 @@ public:
                graph_->set_color("tick-time", diagnostics::color(0.0f, 0.6f, 0.9f));   \r
                graph_->set_color("frame-time", diagnostics::color(0.1f, 1.0f, 0.1f));\r
                graph_->set_color("dropped-frame", diagnostics::color(0.3f, 0.6f, 0.3f));\r
+\r
                graph_->set_text(print());\r
                diagnostics::register_graph(graph_);\r
                                                                        \r
@@ -186,8 +222,9 @@ public:
                screen_y_               = devmode.dmPosition.y;\r
                screen_width_   = config_.windowed ? square_width_ : devmode.dmPelsWidth;\r
                screen_height_  = config_.windowed ? square_height_ : devmode.dmPelsHeight;\r
-               \r
+\r
                is_running_ = true;\r
+               current_presentation_age_ = 0;\r
                thread_ = boost::thread([this]{run();});\r
        }\r
        \r
@@ -203,7 +240,7 @@ public:
                if(!GLEW_VERSION_2_1)\r
                        BOOST_THROW_EXCEPTION(not_supported() << msg_info("Missing OpenGL 2.1 support."));\r
 \r
-               window_.Create(sf::VideoMode(screen_width_, screen_height_, 32), narrow(print()), config_.windowed ? sf::Style::Resize | sf::Style::Close : sf::Style::Fullscreen);\r
+               window_.Create(sf::VideoMode(screen_width_, screen_height_, 32), narrow(print()), config_.borderless ? sf::Style::None : (config_.windowed ? sf::Style::Resize | sf::Style::Close : sf::Style::Fullscreen));\r
                window_.ShowMouseCursor(false);\r
                window_.SetPosition(screen_x_, screen_y_);\r
                window_.SetSize(screen_width_, screen_height_);\r
@@ -232,6 +269,18 @@ public:
                glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbos_[1]);\r
                glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, format_desc_.size, 0, GL_STREAM_DRAW_ARB);\r
                glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);\r
+                               \r
+               if(config_.vsync)\r
+               {\r
+                       auto wglSwapIntervalEXT = reinterpret_cast<void(APIENTRY*)(int)>(wglGetProcAddress("wglSwapIntervalEXT"));\r
+                       if(wglSwapIntervalEXT)\r
+                       {\r
+                               wglSwapIntervalEXT(1);\r
+                               CASPAR_LOG(info) << print() << " Successfully enabled vsync.";\r
+                       }\r
+                       else\r
+                               CASPAR_LOG(info) << print() << " Failed to enable vsync.";\r
+               }\r
 \r
                CASPAR_LOG(info) << print() << " Successfully Initialized.";\r
        }\r
@@ -250,6 +299,9 @@ public:
 \r
        void run()\r
        {\r
+               win32_exception::ensure_handler_installed_for_thread(\r
+                               "ogl-consumer-thread");\r
+\r
                try\r
                {\r
                        init();\r
@@ -269,14 +321,45 @@ public:
                                        }\r
                        \r
                                        safe_ptr<core::read_frame> frame;\r
+\r
                                        frame_buffer_.pop(frame);\r
                                        \r
-                                       perf_timer_.restart();\r
-                                       render(frame);\r
-                                       graph_->set_value("frame-time", perf_timer_.elapsed()*format_desc_.fps*0.5);    \r
-\r
-                                       window_.Display();\r
+                                       if(static_cast<size_t>(frame->image_data().size()) != format_desc_.size)\r
+                                               continue;\r
                                        \r
+                                       {\r
+                                               auto av_frame = safe_ptr<AVFrame>(av_frame_alloc(), [frame](AVFrame* frame)\r
+                                               {\r
+                                                       av_frame_free(&frame);\r
+                                               });\r
+                                               \r
+                                               av_frame->linesize[0]           = format_desc_.width*4;                 \r
+                                               av_frame->format                        = PIX_FMT_BGRA;\r
+                                               av_frame->width                         = format_desc_.width;\r
+                                               av_frame->height                        = format_desc_.height;\r
+                                               av_frame->interlaced_frame      = format_desc_.field_mode != core::field_mode::progressive;\r
+                                               av_frame->top_field_first       = format_desc_.field_mode == core::field_mode::upper ? 1 : 0;\r
+                                               av_frame->data[0]                       = const_cast<uint8_t*>(frame->image_data().begin());\r
+                                               av_frame->pts                           = pts_++;\r
+                                               filter_.push(av_frame);\r
+                                       }\r
+\r
+                                       while(true)\r
+                                       {\r
+                                               perf_timer_.restart();\r
+                                               auto av_frame = filter_.poll();\r
+\r
+                                               if (!av_frame)\r
+                                                       break;\r
+                                               \r
+                                               render(make_safe_ptr(av_frame), frame->image_data().size());\r
+                                               graph_->set_value("frame-time", perf_timer_.elapsed() * format_desc_.fps * 0.5);\r
+\r
+                                               wait_for_vblank_and_display(); // progressive fram\r
+                                       }\r
+                                               \r
+                                       current_presentation_age_ = frame->get_age_millis();\r
+                                                                               \r
                                        graph_->set_value("tick-time", tick_timer_.elapsed()*format_desc_.fps*0.5);     \r
                                        tick_timer_.restart();\r
                                }\r
@@ -294,55 +377,27 @@ public:
                        CASPAR_LOG_CURRENT_EXCEPTION();\r
                }\r
        }\r
-       \r
-       safe_ptr<AVFrame> get_av_frame()\r
-       {               \r
-               safe_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free);     \r
-               avcodec_get_frame_defaults(av_frame.get());\r
-                                               \r
-               av_frame->linesize[0]           = format_desc_.width*4;                 \r
-               av_frame->format                        = PIX_FMT_BGRA;\r
-               av_frame->width                         = format_desc_.width;\r
-               av_frame->height                        = format_desc_.height;\r
-               av_frame->interlaced_frame      = format_desc_.field_mode != core::field_mode::progressive;\r
-               av_frame->top_field_first       = format_desc_.field_mode == core::field_mode::upper ? 1 : 0;\r
-\r
-               return av_frame;\r
-       }\r
 \r
-       void render(const safe_ptr<core::read_frame>& frame)\r
-       {                       \r
-               if(static_cast<size_t>(frame->image_data().size()) != format_desc_.size)\r
-                       return;\r
-                                       \r
-               auto av_frame = get_av_frame();\r
-               av_frame->data[0] = const_cast<uint8_t*>(frame->image_data().begin());\r
-\r
-               filter_.push(av_frame);\r
-               auto frames = filter_.poll_all();\r
-\r
-               if(frames.empty())\r
-                       return;\r
+       void try_sleep_almost_until_vblank()\r
+       {\r
+               static const double THRESHOLD = 0.003;\r
+               double threshold = config_.vsync ? THRESHOLD : 0.0;\r
 \r
-               av_frame = frames[0];\r
+               auto frame_time = 1.0 / (format_desc_.fps * format_desc_.field_count);\r
 \r
-               if(av_frame->linesize[0] != static_cast<int>(format_desc_.width*4))\r
-               {\r
-                       const uint8_t *src_data[4] = {0};\r
-                       memcpy(const_cast<uint8_t**>(&src_data[0]), av_frame->data, 4);\r
-                       const int src_linesizes[4] = {0};\r
-                       memcpy(const_cast<int*>(&src_linesizes[0]), av_frame->linesize, 4);\r
-\r
-                       auto av_frame2 = get_av_frame();\r
-                       av_image_alloc(av_frame2->data, av_frame2->linesize, av_frame2->width, av_frame2->height, PIX_FMT_BGRA, 16);\r
-                       av_frame = safe_ptr<AVFrame>(av_frame2.get(), [=](AVFrame*)\r
-                       {\r
-                               av_freep(&av_frame2->data[0]);\r
-                       });\r
+               wait_timer_.tick(frame_time - threshold);\r
+       }\r
 \r
-                       av_image_copy(av_frame2->data, av_frame2->linesize, src_data, src_linesizes, PIX_FMT_BGRA, av_frame2->width, av_frame2->height);\r
-               }\r
+       void wait_for_vblank_and_display()\r
+       {\r
+               try_sleep_almost_until_vblank();\r
+               window_.Display();\r
+               // Make sure that the next tick measures the duration from this point in time.\r
+               wait_timer_.tick(0.0);\r
+       }\r
 \r
+       void render(safe_ptr<AVFrame> av_frame, int image_data_size)\r
+       {\r
                glBindTexture(GL_TEXTURE_2D, texture_);\r
 \r
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos_[0]);\r
@@ -351,13 +406,24 @@ public:
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos_[1]);\r
                glBufferData(GL_PIXEL_UNPACK_BUFFER, format_desc_.size, 0, GL_STREAM_DRAW);\r
 \r
+\r
                auto ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);\r
                if(ptr)\r
                {\r
                        if(config_.key_only)\r
-                               fast_memshfl(reinterpret_cast<char*>(ptr), av_frame->data[0], frame->image_data().size(), 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+                       {                               \r
+                               tbb::parallel_for(0, av_frame->height, 1, [&](int y)\r
+                               {\r
+                                       fast_memshfl(reinterpret_cast<char*>(ptr) + y * format_desc_.width * 4, av_frame->data[0] + y * av_frame->linesize[0], format_desc_.width * 4, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+                               });\r
+                       }\r
                        else\r
-                               fast_memcpy(reinterpret_cast<char*>(ptr), av_frame->data[0], frame->image_data().size());\r
+                       {\r
+                               tbb::parallel_for(0, av_frame->height, 1, [&](int y)\r
+                               {\r
+                                       fast_memcpy(reinterpret_cast<char*>(ptr) + y * format_desc_.width * 4, av_frame->data[0] + y * av_frame->linesize[0], format_desc_.width * 4);\r
+                               });\r
+                       }\r
 \r
                        glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); // release the mapped buffer\r
                }\r
@@ -377,16 +443,22 @@ public:
                std::rotate(pbos_.begin(), pbos_.begin() + 1, pbos_.end());\r
        }\r
 \r
-       bool send(const safe_ptr<core::read_frame>& frame)\r
+       boost::unique_future<bool> send(const safe_ptr<core::read_frame>& frame)\r
        {\r
-               if(!frame_buffer_.try_push(frame))\r
-                       graph_->set_tag("dropped-frame");\r
-               return is_running_;\r
+               if (!frame_buffer_.try_push(frame))\r
+                       graph_->set_tag("dropped-frame"); \r
+\r
+               return wrap_as_future(is_running_.load());\r
+       }\r
+\r
+       std::wstring channel_and_format() const\r
+       {\r
+               return L"[" + boost::lexical_cast<std::wstring>(channel_index_) + L"|" + format_desc_.name + L"]";\r
        }\r
                \r
        std::wstring print() const\r
        {       \r
-               return config_.name + L"[" + boost::lexical_cast<std::wstring>(channel_index_) + L"|" + format_desc_.name + L"]";\r
+               return config_.name + L" " + channel_and_format();\r
        }\r
        \r
        void calculate_aspect()\r
@@ -475,8 +547,13 @@ public:
                consumer_.reset(new ogl_consumer(config_, format_desc, channel_index));\r
                CASPAR_LOG(info) << print() << L" Successfully Initialized.";   \r
        }\r
-       \r
-       virtual bool send(const safe_ptr<core::read_frame>& frame) override\r
+\r
+       virtual int64_t presentation_frame_age_millis() const override\r
+       {\r
+               return consumer_ ? consumer_->current_presentation_age_ : 0;\r
+       }\r
+\r
+       virtual boost::unique_future<bool> send(const safe_ptr<core::read_frame>& frame) override\r
        {\r
                return consumer_->send(frame);\r
        }\r
@@ -508,27 +585,26 @@ public:
 \r
        virtual int index() const override\r
        {\r
-               return 600;\r
+               return 600 + config_.screen_index;\r
        }\r
 };     \r
 \r
-safe_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>& params)\r
+safe_ptr<core::frame_consumer> create_consumer(const core::parameters& params)\r
 {\r
        if(params.size() < 1 || params[0] != L"SCREEN")\r
                return core::frame_consumer::empty();\r
        \r
        configuration config;\r
                \r
-       auto device_it = std::find(params.begin(), params.end(), L"DEVICE");\r
-       if(device_it != params.end() && ++device_it != params.end())\r
-               config.screen_index = boost::lexical_cast<int>(*device_it);\r
-               \r
-       config.key_only = std::find(params.begin(), params.end(), L"WINDOWED") != params.end();\r
-       config.key_only = std::find(params.begin(), params.end(), L"KEY_ONLY") != params.end();\r
+       if(params.size() > 1)\r
+               config.screen_index =\r
+                               lexical_cast_or_default<int>(params[1], config.screen_index);\r
 \r
-       auto name_it    = std::find(params.begin(), params.end(), L"NAME");\r
-       if(name_it != params.end() && ++name_it != params.end())\r
-               config.name = *name_it;\r
+       config.screen_index = params.get(L"DEVICE", config.screen_index);\r
+       config.windowed = !params.has(L"FULLSCREEN");\r
+       config.key_only = params.has(L"KEY_ONLY");\r
+       config.name = params.get(L"NAME", config.name);\r
+       config.borderless = params.has(L"BORDERLESS");\r
 \r
        return make_safe<ogl_consumer_proxy>(config);\r
 }\r
@@ -541,7 +617,9 @@ safe_ptr<core::frame_consumer> create_consumer(const boost::property_tree::wptre
        config.windowed                 = ptree.get(L"windowed", config.windowed);\r
        config.key_only                 = ptree.get(L"key-only", config.key_only);\r
        config.auto_deinterlace = ptree.get(L"auto-deinterlace", config.auto_deinterlace);\r
-       \r
+       config.vsync                    = ptree.get(L"vsync", config.vsync);\r
+       config.borderless       = ptree.get(L"borderless", config.borderless);\r
+\r
        auto stretch_str = ptree.get(L"stretch", L"default");\r
        if(stretch_str == L"uniform")\r
                config.stretch = stretch::uniform;\r