]> git.sesse.net Git - casparcg/commitdiff
ogl: Fixed performance issues.
authorRobert Nagy <ronag@live.com>
Thu, 19 Dec 2013 18:38:24 +0000 (19:38 +0100)
committerRobert Nagy <ronag@live.com>
Thu, 19 Dec 2013 18:38:24 +0000 (19:38 +0100)
modules/ffmpeg/producer/filter/filter.cpp
modules/ogl/consumer/ogl_consumer.cpp

index de6486d9b1dff728c9f1c91c376aadd5330b4b50..bd09d1cc16dd12000981596b45f0bddfc7924a80 100644 (file)
@@ -33,7 +33,6 @@
 #include <boost/assign.hpp>\r
 #include <boost/algorithm/string.hpp>\r
 #include <boost/foreach.hpp>\r
-#include <boost/thread.hpp>\r
 #include <boost/format.hpp>\r
 #include <boost/rational.hpp>\r
 \r
@@ -104,7 +103,7 @@ struct filter::implementation
                                avfilter_graph_free(&p);\r
                        });\r
                \r
-               video_graph_->nb_threads  = boost::thread::hardware_concurrency();\r
+               video_graph_->nb_threads  = 0;\r
                video_graph_->thread_type = AVFILTER_THREAD_SLICE;\r
 \r
                const auto vsrc_options = (boost::format("video_size=%1%x%2%:pix_fmt=%3%:time_base=%4%/%5%:pixel_aspect=%6%/%7%:frame_rate=%8%/%9%")\r
@@ -149,15 +148,15 @@ struct filter::implementation
                        filtergraph_,\r
                        *filt_vsrc,\r
                        *filt_vsink);\r
-\r
+               \r
                video_graph_in_  = filt_vsrc;\r
                video_graph_out_ = filt_vsink;\r
                \r
-               //CASPAR_LOG(info)\r
-               //      <<      widen(std::string("\n") \r
-               //              + avfilter_graph_dump(\r
-               //                              video_graph_.get(), \r
-               //                              nullptr));\r
+               CASPAR_LOG(info)\r
+                       <<      widen(std::string("\n") \r
+                               + avfilter_graph_dump(\r
+                                               video_graph_.get(), \r
+                                               nullptr));\r
        }\r
        \r
        void configure_filtergraph(\r
index edb5b9f41a5bbfe88719bda0c6d588f51d7fd47d..abc47a1daa161e7cb819b394d8907e258193e540 100644 (file)
@@ -136,6 +136,8 @@ struct ogl_consumer : boost::noncopyable
        size_t                                  square_height_;                         \r
        \r
        sf::Window                              window_;\r
+\r
+       std::int64_t                    pts_;\r
        \r
        safe_ptr<diagnostics::graph>    graph_;\r
        boost::timer                                    perf_timer_;\r
@@ -161,6 +163,7 @@ public:
                , screen_height_(format_desc.height)\r
                , square_width_(format_desc.square_width)\r
                , square_height_(format_desc.square_height)\r
+               , pts_(0)\r
                , filter_([&]() -> ffmpeg::filter\r
                {                       \r
                        const auto sample_aspect_ratio = \r
@@ -179,7 +182,7 @@ public:
                                sample_aspect_ratio,\r
                                AV_PIX_FMT_BGRA,\r
                                boost::assign::list_of(AV_PIX_FMT_BGRA),\r
-                               format_desc.field_mode == core::field_mode::progressive || !config.auto_deinterlace ? "" : "YADIF=1:-1");\r
+                               format_desc.field_mode == core::field_mode::progressive || !config.auto_deinterlace ? "" : "format=pix_fmts=gbrp,YADIF=1:-1");\r
                }())\r
        {               \r
                if(format_desc_.format == core::video_format::ntsc && config_.aspect == configuration::aspect_4_3)\r
@@ -320,9 +323,43 @@ public:
                                        safe_ptr<core::read_frame> frame;\r
 \r
                                        frame_buffer_.pop(frame);\r
-\r
-                                       render_and_draw_frame(frame);\r
                                        \r
+                                       if(static_cast<size_t>(frame->image_data().size()) != format_desc_.size)\r
+                                               continue;\r
+                                       \r
+                                       {\r
+                                               auto av_frame = safe_ptr<AVFrame>(av_frame_alloc(), [frame](AVFrame* frame)\r
+                                               {\r
+                                                       av_frame_free(&frame);\r
+                                               });\r
+                                               \r
+                                               av_frame->linesize[0]           = format_desc_.width*4;                 \r
+                                               av_frame->format                        = PIX_FMT_BGRA;\r
+                                               av_frame->width                         = format_desc_.width;\r
+                                               av_frame->height                        = format_desc_.height;\r
+                                               av_frame->interlaced_frame      = format_desc_.field_mode != core::field_mode::progressive;\r
+                                               av_frame->top_field_first       = format_desc_.field_mode == core::field_mode::upper ? 1 : 0;\r
+                                               av_frame->data[0]                       = const_cast<uint8_t*>(frame->image_data().begin());\r
+                                               av_frame->pts                           = pts_++;\r
+                                               filter_.push(av_frame);\r
+                                       }\r
+\r
+                                       while(true)\r
+                                       {\r
+                                               perf_timer_.restart();\r
+                                               auto av_frame = filter_.poll();\r
+\r
+                                               if (!av_frame)\r
+                                                       break;\r
+                                               \r
+                                               render(make_safe_ptr(av_frame), frame->image_data().size());\r
+                                               graph_->set_value("frame-time", perf_timer_.elapsed() * format_desc_.fps * 0.5);\r
+\r
+                                               wait_for_vblank_and_display(); // progressive fram\r
+                                       }\r
+                                               \r
+                                       current_presentation_age_ = frame->get_age_millis();\r
+                                                                               \r
                                        graph_->set_value("tick-time", tick_timer_.elapsed()*format_desc_.fps*0.5);     \r
                                        tick_timer_.restart();\r
                                }\r
@@ -358,81 +395,9 @@ public:
                // Make sure that the next tick measures the duration from this point in time.\r
                wait_timer_.tick(0.0);\r
        }\r
-       \r
-       safe_ptr<AVFrame> get_av_frame()\r
-       {               \r
-               safe_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free);     \r
-               avcodec_get_frame_defaults(av_frame.get());\r
-                                               \r
-               av_frame->linesize[0]           = format_desc_.width*4;                 \r
-               av_frame->format                        = PIX_FMT_BGRA;\r
-               av_frame->width                         = format_desc_.width;\r
-               av_frame->height                        = format_desc_.height;\r
-               av_frame->interlaced_frame      = format_desc_.field_mode != core::field_mode::progressive;\r
-               av_frame->top_field_first       = format_desc_.field_mode == core::field_mode::upper ? 1 : 0;\r
-\r
-               return av_frame;\r
-       }\r
-\r
-       void render_and_draw_frame(const safe_ptr<core::read_frame>& frame)\r
-       {\r
-               if(static_cast<size_t>(frame->image_data().size()) != format_desc_.size)\r
-                       return;\r
-                                       \r
-               perf_timer_.restart();\r
-               auto av_frame = get_av_frame();\r
-               av_frame->data[0] = const_cast<uint8_t*>(frame->image_data().begin());\r
-\r
-               filter_.push(av_frame);\r
-               auto frames = filter_.poll_all();\r
-\r
-               if (frames.empty())\r
-                       return;\r
-\r
-               if (frames.size() == 1)\r
-               {\r
-                       render(frames[0], frame->image_data().size());\r
-                       graph_->set_value("frame-time", perf_timer_.elapsed() * format_desc_.fps * 0.5);\r
-\r
-                       wait_for_vblank_and_display(); // progressive frame\r
-               }\r
-               else if (frames.size() == 2)\r
-               {\r
-                       render(frames[0], frame->image_data().size());\r
-                       double perf_elapsed = perf_timer_.elapsed();\r
-\r
-                       wait_for_vblank_and_display(); // field1\r
-\r
-                       perf_timer_.restart();\r
-                       render(frames[1], frame->image_data().size());\r
-                       perf_elapsed += perf_timer_.elapsed();\r
-                       graph_->set_value("frame-time", perf_elapsed * format_desc_.fps * 0.5);\r
-\r
-                       wait_for_vblank_and_display(); // field2\r
-               }\r
-\r
-               current_presentation_age_ = frame->get_age_millis();\r
-       }\r
 \r
        void render(safe_ptr<AVFrame> av_frame, int image_data_size)\r
        {\r
-               if(av_frame->linesize[0] != static_cast<int>(format_desc_.width*4))\r
-               {\r
-                       const uint8_t *src_data[4] = {0};\r
-                       memcpy(const_cast<uint8_t**>(&src_data[0]), av_frame->data, 4);\r
-                       const int src_linesizes[4] = {0};\r
-                       memcpy(const_cast<int*>(&src_linesizes[0]), av_frame->linesize, 4);\r
-\r
-                       auto av_frame2 = get_av_frame();\r
-                       av_image_alloc(av_frame2->data, av_frame2->linesize, av_frame2->width, av_frame2->height, PIX_FMT_BGRA, 16);\r
-                       av_frame = safe_ptr<AVFrame>(av_frame2.get(), [=](AVFrame*)\r
-                       {\r
-                               av_freep(&av_frame2->data[0]);\r
-                       });\r
-\r
-                       av_image_copy(av_frame2->data, av_frame2->linesize, src_data, src_linesizes, PIX_FMT_BGRA, av_frame2->width, av_frame2->height);\r
-               }\r
-\r
                glBindTexture(GL_TEXTURE_2D, texture_);\r
 \r
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos_[0]);\r
@@ -441,13 +406,24 @@ public:
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos_[1]);\r
                glBufferData(GL_PIXEL_UNPACK_BUFFER, format_desc_.size, 0, GL_STREAM_DRAW);\r
 \r
+\r
                auto ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);\r
                if(ptr)\r
                {\r
                        if(config_.key_only)\r
-                               fast_memshfl(reinterpret_cast<char*>(ptr), av_frame->data[0], image_data_size, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+                       {                               \r
+                               tbb::parallel_for(0, av_frame->height, 1, [&](int y)\r
+                               {\r
+                                       fast_memshfl(reinterpret_cast<char*>(ptr) + y * format_desc_.width * 4, av_frame->data[0] + y * av_frame->linesize[0], format_desc_.width * 4, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);\r
+                               });\r
+                       }\r
                        else\r
-                               fast_memcpy(reinterpret_cast<char*>(ptr), av_frame->data[0], image_data_size);\r
+                       {\r
+                               tbb::parallel_for(0, av_frame->height, 1, [&](int y)\r
+                               {\r
+                                       fast_memcpy(reinterpret_cast<char*>(ptr) + y * format_desc_.width * 4, av_frame->data[0] + y * av_frame->linesize[0], format_desc_.width * 4);\r
+                               });\r
+                       }\r
 \r
                        glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); // release the mapped buffer\r
                }\r