]> git.sesse.net Git - casparcg/blobdiff - modules/ogl/consumer/ogl_consumer.cpp
ogl: Fixed performance issues.
[casparcg] / modules / ogl / consumer / ogl_consumer.cpp
index b4f950f7cd95038083b360cc37a2ada0d9a48181..abc47a1daa161e7cb819b394d8907e258193e540 100644 (file)
@@ -33,6 +33,8 @@
 #include <common/utility/timer.h>\r
 #include <common/utility/string.h>\r
 #include <common/concurrency/future_util.h>\r
+#include <common/concurrency/executor.h>\r
+#include <common/exception/win32_exception.h>\r
 \r
 #include <ffmpeg/producer/filter/filter.h>\r
 \r
@@ -99,9 +101,10 @@ struct configuration
        bool                    key_only;\r
        aspect_ratio    aspect; \r
        bool                    vsync;\r
+       bool                    borderless;\r
 \r
        configuration()\r
-               : name(L"ogl")\r
+               : name(L"Screen consumer")\r
                , screen_index(0)\r
                , stretch(fill)\r
                , windowed(true)\r
@@ -109,6 +112,7 @@ struct configuration
                , key_only(false)\r
                , aspect(aspect_invalid)\r
                , vsync(false)\r
+               , borderless(false)\r
        {\r
        }\r
 };\r
@@ -132,6 +136,8 @@ struct ogl_consumer : boost::noncopyable
        size_t                                  square_height_;                         \r
        \r
        sf::Window                              window_;\r
+\r
+       std::int64_t                    pts_;\r
        \r
        safe_ptr<diagnostics::graph>    graph_;\r
        boost::timer                                    perf_timer_;\r
@@ -157,7 +163,27 @@ public:
                , screen_height_(format_desc.height)\r
                , square_width_(format_desc.square_width)\r
                , square_height_(format_desc.square_height)\r
-               , filter_(format_desc.field_mode == core::field_mode::progressive || !config.auto_deinterlace ? L"" : L"YADIF=1:-1", boost::assign::list_of(PIX_FMT_BGRA))\r
+               , pts_(0)\r
+               , filter_([&]() -> ffmpeg::filter\r
+               {                       \r
+                       const auto sample_aspect_ratio = \r
+                               boost::rational<int>(\r
+                                       format_desc.square_width, \r
+                                       format_desc.square_height) /\r
+                               boost::rational<int>(\r
+                                       format_desc.width, \r
+                                       format_desc.height);\r
+\r
+                       return ffmpeg::filter(\r
+                               format_desc.width,\r
+                               format_desc.height,\r
+                               boost::rational<int>(format_desc.duration, format_desc.time_scale),\r
+                               boost::rational<int>(format_desc.time_scale, format_desc.duration),\r
+                               sample_aspect_ratio,\r
+                               AV_PIX_FMT_BGRA,\r
+                               boost::assign::list_of(AV_PIX_FMT_BGRA),\r
+                               format_desc.field_mode == core::field_mode::progressive || !config.auto_deinterlace ? "" : "format=pix_fmts=gbrp,YADIF=1:-1");\r
+               }())\r
        {               \r
                if(format_desc_.format == core::video_format::ntsc && config_.aspect == configuration::aspect_4_3)\r
                {\r
@@ -214,7 +240,7 @@ public:
                if(!GLEW_VERSION_2_1)\r
                        BOOST_THROW_EXCEPTION(not_supported() << msg_info("Missing OpenGL 2.1 support."));\r
 \r
-               window_.Create(sf::VideoMode(screen_width_, screen_height_, 32), narrow(L"Screen consumer " + channel_and_format()), config_.windowed ? sf::Style::Resize | sf::Style::Close : sf::Style::Fullscreen);\r
+               window_.Create(sf::VideoMode(screen_width_, screen_height_, 32), narrow(print()), config_.borderless ? sf::Style::None : (config_.windowed ? sf::Style::Resize | sf::Style::Close : sf::Style::Fullscreen));\r
                window_.ShowMouseCursor(false);\r
                window_.SetPosition(screen_x_, screen_y_);\r
                window_.SetSize(screen_width_, screen_height_);\r
@@ -273,6 +299,9 @@ public:
 \r
        void run()\r
        {\r
+               win32_exception::ensure_handler_installed_for_thread(\r
+                               "ogl-consumer-thread");\r
+\r
                try\r
                {\r
                        init();\r
@@ -294,9 +323,43 @@ public:
                                        safe_ptr<core::read_frame> frame;\r
 \r
                                        frame_buffer_.pop(frame);\r
-\r
-                                       render_and_draw_frame(frame);\r
                                        \r
+                                       if(static_cast<size_t>(frame->image_data().size()) != format_desc_.size)\r
+                                               continue;\r
+                                       \r
+                                       {\r
+                                               auto av_frame = safe_ptr<AVFrame>(av_frame_alloc(), [frame](AVFrame* frame)\r
+                                               {\r
+                                                       av_frame_free(&frame);\r
+                                               });\r
+                                               \r
+                                               av_frame->linesize[0]           = format_desc_.width*4;                 \r
+                                               av_frame->format                        = PIX_FMT_BGRA;\r
+                                               av_frame->width                         = format_desc_.width;\r
+                                               av_frame->height                        = format_desc_.height;\r
+                                               av_frame->interlaced_frame      = format_desc_.field_mode != core::field_mode::progressive;\r
+                                               av_frame->top_field_first       = format_desc_.field_mode == core::field_mode::upper ? 1 : 0;\r
+                                               av_frame->data[0]                       = const_cast<uint8_t*>(frame->image_data().begin());\r
+                                               av_frame->pts                           = pts_++;\r
+                                               filter_.push(av_frame);\r
+                                       }\r
+\r
+                                       while(true)\r
+                                       {\r
+                                               perf_timer_.restart();\r
+                                               auto av_frame = filter_.poll();\r
+\r
+                                               if (!av_frame)\r
+                                                       break;\r
+                                               \r
+                                               render(make_safe_ptr(av_frame), frame->image_data().size());\r
+                                               graph_->set_value("frame-time", perf_timer_.elapsed() * format_desc_.fps * 0.5);\r
+\r
+                                               wait_for_vblank_and_display(); // progressive fram\r
+                                       }\r
+                                               \r
+                                       current_presentation_age_ = frame->get_age_millis();\r
+                                                                               \r
                                        graph_->set_value("tick-time", tick_timer_.elapsed()*format_desc_.fps*0.5);     \r
                                        tick_timer_.restart();\r
                                }\r
@@ -332,81 +395,9 @@ public:
                // Make sure that the next tick measures the duration from this point in time.\r
                wait_timer_.tick(0.0);\r
        }\r
-       \r
-       safe_ptr<AVFrame> get_av_frame()\r
-       {               \r
-               safe_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free);     \r
-               avcodec_get_frame_defaults(av_frame.get());\r
-                                               \r
-               av_frame->linesize[0]           = format_desc_.width*4;                 \r
-               av_frame->format                        = PIX_FMT_BGRA;\r
-               av_frame->width                         = format_desc_.width;\r
-               av_frame->height                        = format_desc_.height;\r
-               av_frame->interlaced_frame      = format_desc_.field_mode != core::field_mode::progressive;\r
-               av_frame->top_field_first       = format_desc_.field_mode == core::field_mode::upper ? 1 : 0;\r
-\r
-               return av_frame;\r
-       }\r
-\r
-       void render_and_draw_frame(const safe_ptr<core::read_frame>& frame)\r
-       {\r
-               if(static_cast<size_t>(frame->image_data().size()) != format_desc_.size)\r
-                       return;\r
-                                       \r
-               perf_timer_.restart();\r
-               auto av_frame = get_av_frame();\r
-               av_frame->data[0] = const_cast<uint8_t*>(frame->image_data().begin());\r
-\r
-               filter_.push(av_frame);\r
-               auto frames = filter_.poll_all();\r
-\r
-               if (frames.empty())\r
-                       return;\r
-\r
-               if (frames.size() == 1)\r
-               {\r
-                       render(frames[0], frame->image_data().size());\r
-                       graph_->set_value("frame-time", perf_timer_.elapsed() * format_desc_.fps * 0.5);\r
-\r
-                       wait_for_vblank_and_display(); // progressive frame\r
-               }\r
-               else if (frames.size() == 2)\r
-               {\r
-                       render(frames[0], frame->image_data().size());\r
-                       double perf_elapsed = perf_timer_.elapsed();\r
-\r
-                       wait_for_vblank_and_display(); // field1\r
-\r
-                       perf_timer_.restart();\r
-                       render(frames[1], frame->image_data().size());\r
-                       perf_elapsed += perf_timer_.elapsed();\r
-                       graph_->set_value("frame-time", perf_elapsed * format_desc_.fps * 0.5);\r
-\r
-                       wait_for_vblank_and_display(); // field2\r
-               }\r
-\r
-               current_presentation_age_ = frame->get_age_millis();\r
-       }\r
 \r
        void render(safe_ptr<AVFrame> av_frame, int image_data_size)\r
        {\r
-               if(av_frame->linesize[0] != static_cast<int>(format_desc_.width*4))\r
-               {\r
-                       const uint8_t *src_data[4] = {0};\r
-                       memcpy(const_cast<uint8_t**>(&src_data[0]), av_frame->data, 4);\r
-                       const int src_linesizes[4] = {0};\r
-                       memcpy(const_cast<int*>(&src_linesizes[0]), av_frame->linesize, 4);\r
-\r
-                       auto av_frame2 = get_av_frame();\r
-                       av_image_alloc(av_frame2->data, av_frame2->linesize, av_frame2->width, av_frame2->height, PIX_FMT_BGRA, 16);\r
-                       av_frame = safe_ptr<AVFrame>(av_frame2.get(), [=](AVFrame*)\r
-                       {\r
-                               av_freep(&av_frame2->data[0]);\r
-                       });\r
-\r
-                       av_image_copy(av_frame2->data, av_frame2->linesize, src_data, src_linesizes, PIX_FMT_BGRA, av_frame2->width, av_frame2->height);\r
-               }\r
-\r
                glBindTexture(GL_TEXTURE_2D, texture_);\r
 \r
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos_[0]);\r
@@ -415,13 +406,24 @@ public:
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos_[1]);\r
                glBufferData(GL_PIXEL_UNPACK_BUFFER, format_desc_.size, 0, GL_STREAM_DRAW);\r
 \r
+\r
                auto ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);\r
                if(ptr)\r
                {\r
                        if(config_.key_only)\r
-                               fast_memshfl(reinterpret_cast<char*>(ptr), av_frame->data[0], image_data_size, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+                       {                               \r
+                               tbb::parallel_for(0, av_frame->height, 1, [&](int y)\r
+                               {\r
+                                       fast_memshfl(reinterpret_cast<char*>(ptr) + y * format_desc_.width * 4, av_frame->data[0] + y * av_frame->linesize[0], format_desc_.width * 4, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+                               });\r
+                       }\r
                        else\r
-                               fast_memcpy(reinterpret_cast<char*>(ptr), av_frame->data[0], image_data_size);\r
+                       {\r
+                               tbb::parallel_for(0, av_frame->height, 1, [&](int y)\r
+                               {\r
+                                       fast_memcpy(reinterpret_cast<char*>(ptr) + y * format_desc_.width * 4, av_frame->data[0] + y * av_frame->linesize[0], format_desc_.width * 4);\r
+                               });\r
+                       }\r
 \r
                        glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); // release the mapped buffer\r
                }\r
@@ -456,7 +458,7 @@ public:
                \r
        std::wstring print() const\r
        {       \r
-               return config_.name + channel_and_format();\r
+               return config_.name + L" " + channel_and_format();\r
        }\r
        \r
        void calculate_aspect()\r
@@ -602,6 +604,7 @@ safe_ptr<core::frame_consumer> create_consumer(const core::parameters& params)
        config.windowed = !params.has(L"FULLSCREEN");\r
        config.key_only = params.has(L"KEY_ONLY");\r
        config.name = params.get(L"NAME", config.name);\r
+       config.borderless = params.has(L"BORDERLESS");\r
 \r
        return make_safe<ogl_consumer_proxy>(config);\r
 }\r
@@ -615,6 +618,7 @@ safe_ptr<core::frame_consumer> create_consumer(const boost::property_tree::wptre
        config.key_only                 = ptree.get(L"key-only", config.key_only);\r
        config.auto_deinterlace = ptree.get(L"auto-deinterlace", config.auto_deinterlace);\r
        config.vsync                    = ptree.get(L"vsync", config.vsync);\r
+       config.borderless       = ptree.get(L"borderless", config.borderless);\r
 \r
        auto stretch_str = ptree.get(L"stretch", L"default");\r
        if(stretch_str == L"uniform")\r