]> git.sesse.net Git - casparcg/blobdiff - accelerator/cpu/image/image_mixer.cpp
2.1.0: -data_frame: use unique_ptr instead of shared_ptr.
[casparcg] / accelerator / cpu / image / image_mixer.cpp
index 375ebbb50780c30f107e6d84579ed2932dc9b23e..e72d041646e594d22a6e1252910843d98dafe2d9 100644 (file)
@@ -23,7 +23,7 @@
 \r
 #include "image_mixer.h"\r
 \r
-#include "../util/write_frame.h"\r
+#include "../util/data_frame.h"\r
 #include "../util/xmm.h"\r
 \r
 #include <common/assert.h>\r
@@ -31,7 +31,7 @@
 #include <common/concurrency/async.h>\r
 #include <common/memory/memcpy.h>\r
 \r
-#include <core/frame/write_frame.h>\r
+#include <core/frame/data_frame.h>\r
 #include <core/frame/frame_transform.h>\r
 #include <core/frame/pixel_format.h>\r
 #include <core/video_format.h>\r
@@ -75,7 +75,7 @@ struct item
 {\r
        core::pixel_format_desc                                         pix_desc;\r
        std::vector<spl::shared_ptr<host_buffer>>       buffers;\r
-       core::frame_transform                                           transform;\r
+       core::image_transform                                           transform;\r
 \r
        item()\r
                : pix_desc(core::pixel_format::invalid)\r
@@ -114,27 +114,38 @@ inline xmm::s8_x blend(xmm::s8_x d, xmm::s8_x s)
        return s8_x(s) + (d - argb);\r
 }\r
        \r
-template<typename write_tag>\r
-static void kernel(uint8_t* dest, const uint8_t* source, size_t count, const core::frame_transform& transform)\r
+template<typename temporal, typename alignment>\r
+static void kernel(uint8_t* dest, const uint8_t* source, size_t count)\r
 {                      \r
        using namespace xmm;\r
 \r
        for(auto n = 0; n < count; n += 32)    \r
        {\r
-               auto s0 = s8_x::load(dest+n+0);\r
-               auto s1 = s8_x::load(dest+n+16);\r
+               auto s0 = s8_x::load<temporal_tag, alignment>(dest+n+0);\r
+               auto s1 = s8_x::load<temporal_tag, alignment>(dest+n+16);\r
 \r
-               auto d0 = s8_x::load(source+n+0);\r
-               auto d1 = s8_x::load(source+n+16);\r
+               auto d0 = s8_x::load<temporal_tag, alignment>(source+n+0);\r
+               auto d1 = s8_x::load<temporal_tag, alignment>(source+n+16);\r
                \r
                auto argb0 = blend(d0, s0);\r
                auto argb1 = blend(d1, s1);\r
 \r
-               s8_x::write(argb0, dest+n+0 , write_tag());\r
-               s8_x::write(argb1, dest+n+16, write_tag());\r
+               s8_x::store<temporal, alignment>(argb0, dest+n+0 );\r
+               s8_x::store<temporal, alignment>(argb1, dest+n+16);\r
        } \r
 }\r
 \r
+template<typename temporal>\r
+static void kernel(uint8_t* dest, const uint8_t* source, size_t count)\r
+{                      \r
+       using namespace xmm;\r
+\r
+       if(reinterpret_cast<int>(dest) % 16 != 0 || reinterpret_cast<int>(source) % 16 != 0)\r
+               kernel<temporal_tag, unaligned_tag>(dest, source, count);\r
+       else\r
+               kernel<temporal_tag, aligned_tag>(dest, source, count);\r
+}\r
+\r
 class image_renderer\r
 {\r
        std::pair<std::vector<item>, boost::shared_future<boost::iterator_range<const uint8_t*>>>               last_image_;\r
@@ -176,8 +187,18 @@ private:
        {               \r
                BOOST_FOREACH(auto& item, items)\r
                        item.transform.field_mode &= field_mode;\r
-\r
-               boost::remove_erase_if(items, [](item& item){return item.transform.field_mode == core::field_mode::empty;});\r
+               \r
+               // Remove empty items.\r
+               boost::range::remove_erase_if(items, [&](const item& item)\r
+               {\r
+                       return item.transform.field_mode == core::field_mode::empty;\r
+               });\r
+               \r
+               // Remove first field stills.\r
+               boost::range::remove_erase_if(items, [&](const item& item)\r
+               {\r
+                       return item.transform.is_still && item.transform.field_mode == field_mode; // only us last field for stills.\r
+               });\r
 \r
                if(items.empty())\r
                        return;\r
@@ -194,81 +215,86 @@ private:
                // TODO: Add support for slide transition.\r
                tbb::parallel_for(tbb::blocked_range<int>(0, height/step), [&](const tbb::blocked_range<int>& r)\r
                {\r
-                       for(auto n = r.begin(); n != r.end(); ++n)\r
+                       for(auto i = r.begin(); i != r.end(); ++i)\r
                        {\r
-                               auto y = n*step+start;\r
+                               auto y = i*step+start;\r
 \r
-                               auto it = items.begin();\r
-                               for(; it != items.end()-1; ++it)                        \r
-                                       kernel<xmm::store_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
-\r
-                               kernel<xmm::stream_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
+                               for(std::size_t n = 0; n < items.size()-1; ++n)\r
+                                       kernel<xmm::temporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
+                               \r
+                               std::size_t n = items.size()-1;                         \r
+                               kernel<xmm::nontemporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
                        }\r
+\r
+                       _mm_mfence();\r
                });\r
        }\r
-       \r
-       void convert(std::vector<item>& items, int width, int height)\r
+               \r
+       void convert(std::vector<item>& source_items, int width, int height)\r
        {\r
                std::set<std::vector<spl::shared_ptr<host_buffer>>> buffers;\r
 \r
-               BOOST_FOREACH(auto& item, items)\r
+               BOOST_FOREACH(auto& item, source_items)\r
                        buffers.insert(item.buffers);\r
                \r
-               tbb::parallel_for_each(buffers.begin(), buffers.end(), std::bind(&image_renderer::do_convert, this, std::ref(items), std::placeholders::_1, width, height));                                    \r
-       }\r
+               auto dest_items = source_items;\r
 \r
-       void do_convert(std::vector<item>& items, const std::vector<spl::shared_ptr<host_buffer>>& buffers, int width, int height)\r
-       {               \r
-               auto pix_desc  = std::find_if(items.begin(), items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
+               tbb::parallel_for_each(buffers.begin(), buffers.end(), [&](const std::vector<spl::shared_ptr<host_buffer>>& buffers)\r
+               {                       \r
+                       auto pix_desc = std::find_if(source_items.begin(), source_items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
 \r
-               if(pix_desc.format == core::pixel_format::bgra && \r
-                       pix_desc.planes.at(0).width == width &&\r
-                       pix_desc.planes.at(0).height == height)\r
-                       return;\r
+                       if(pix_desc.format == core::pixel_format::bgra && \r
+                               pix_desc.planes.at(0).width == width &&\r
+                               pix_desc.planes.at(0).height == height)\r
+                               return;\r
 \r
-               auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
+                       auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
                                                                \r
-               int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
+                       int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
                                                \r
-               auto& pool = sws_contexts_[key];\r
+                       auto& pool = sws_contexts_[key];\r
 \r
-               std::shared_ptr<SwsContext> sws_context;\r
-               if(!pool.try_pop(sws_context))\r
-               {\r
-                       double param;\r
-                       sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, &param), sws_freeContext);\r
-               }\r
+                       std::shared_ptr<SwsContext> sws_context;\r
+                       if(!pool.try_pop(sws_context))\r
+                       {\r
+                               double param;\r
+                               sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, &param), sws_freeContext);\r
+                       }\r
                        \r
-               if(!sws_context)                                \r
-                       BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext"));                           \r
+                       if(!sws_context)                                \r
+                               BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext"));                           \r
                \r
-               auto dest = spl::make_shared<host_buffer>(width*height*4);\r
+                       auto dest_frame = spl::make_shared<host_buffer>(width*height*4);\r
 \r
-               {\r
-                       spl::shared_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free);      \r
-                       avcodec_get_frame_defaults(av_frame.get());                     \r
-                       avpicture_fill(reinterpret_cast<AVPicture*>(av_frame.get()), dest->data(), PIX_FMT_BGRA, width, height);\r
+                       {\r
+                               spl::shared_ptr<AVFrame> dest_av_frame(avcodec_alloc_frame(), av_free); \r
+                               avcodec_get_frame_defaults(dest_av_frame.get());                        \r
+                               avpicture_fill(reinterpret_cast<AVPicture*>(dest_av_frame.get()), dest_frame->data(), PIX_FMT_BGRA, width, height);\r
                                \r
-                       sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, av_frame->data, av_frame->linesize);                            \r
-                       pool.push(sws_context);\r
-               }\r
-                       \r
-               BOOST_FOREACH(auto& item, items)\r
-               {\r
-                       if(item.buffers == buffers)\r
+                               sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, dest_av_frame->data, dest_av_frame->linesize);                          \r
+                               pool.push(sws_context);\r
+                       }\r
+               \r
+                       for(std::size_t n = 0; n < source_items.size(); ++n)\r
                        {\r
-                               item.buffers                    = boost::assign::list_of(dest);\r
-                               item.pix_desc                   = core::pixel_format_desc(core::pixel_format::bgra);\r
-                               item.pix_desc.planes    = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+                               if(source_items[n].buffers == buffers)\r
+                               {\r
+                                       dest_items[n].buffers                   = boost::assign::list_of(dest_frame);\r
+                                       dest_items[n].pix_desc                  = core::pixel_format_desc(core::pixel_format::bgra);\r
+                                       dest_items[n].pix_desc.planes   = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+                                       dest_items[n].transform                 = source_items[n].transform;\r
+                               }\r
                        }\r
-               }       \r
+               });     \r
+\r
+               source_items = std::move(dest_items);\r
        }\r
 };\r
                \r
 struct image_mixer::impl : boost::noncopyable\r
 {      \r
        image_renderer                                          renderer_;\r
-       std::vector<core::frame_transform>      transform_stack_;\r
+       std::vector<core::image_transform>      transform_stack_;\r
        std::vector<item>                                       items_; // layer/stream/items\r
 public:\r
        impl() \r
@@ -281,31 +307,30 @@ public:
        {\r
        }\r
                \r
-       void push(core::frame_transform& transform)\r
+       void push(const core::frame_transform& transform)\r
        {\r
-               transform_stack_.push_back(transform_stack_.back()*transform);\r
+               transform_stack_.push_back(transform_stack_.back()*transform.image_transform);\r
        }\r
                \r
-       void visit(core::data_frame& frame2)\r
+       void visit(const core::data_frame& frame2)\r
        {                       \r
-               write_frame* frame = dynamic_cast<write_frame*>(&frame2);\r
+               auto frame = dynamic_cast<const cpu::data_frame*>(&frame2);\r
                if(frame == nullptr)\r
                        return;\r
 \r
-               if(frame->get_pixel_format_desc().format == core::pixel_format::invalid)\r
+               if(frame->pixel_format_desc().format == core::pixel_format::invalid)\r
                        return;\r
 \r
-               if(frame->get_buffers().empty())\r
+               if(frame->buffers().empty())\r
                        return;\r
 \r
                if(transform_stack_.back().field_mode == core::field_mode::empty)\r
                        return;\r
 \r
                item item;\r
-               item.pix_desc                   = frame->get_pixel_format_desc();\r
-               item.buffers                    = frame->get_buffers();                         \r
+               item.pix_desc                   = frame->pixel_format_desc();\r
+               item.buffers                    = frame->buffers();                             \r
                item.transform                  = transform_stack_.back();\r
-               item.transform.volume   = core::frame_transform().volume; // Set volume to default since we don't care about it here.\r
 \r
                items_.push_back(item);\r
        }\r
@@ -324,19 +349,19 @@ public:
                return renderer_(std::move(items_), format_desc);\r
        }\r
        \r
-       virtual spl::shared_ptr<cpu::write_frame> create_frame(const void* tag, const core::pixel_format_desc& desc)\r
+       virtual spl::unique_ptr<core::data_frame> create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode)\r
        {\r
-               return spl::make_shared<cpu::write_frame>(tag, desc);\r
+               return spl::make_unique<cpu::data_frame>(tag, desc, frame_rate, field_mode);\r
        }\r
 };\r
 \r
 image_mixer::image_mixer() : impl_(new impl()){}\r
-void image_mixer::push(core::frame_transform& transform){impl_->push(transform);}\r
-void image_mixer::visit(core::data_frame& frame){impl_->visit(frame);}\r
+void image_mixer::push(const core::frame_transform& transform){impl_->push(transform);}\r
+void image_mixer::visit(const core::data_frame& frame){impl_->visit(frame);}\r
 void image_mixer::pop(){impl_->pop();}\r
 boost::shared_future<boost::iterator_range<const uint8_t*>> image_mixer::operator()(const core::video_format_desc& format_desc){return impl_->render(format_desc);}\r
 void image_mixer::begin_layer(core::blend_mode blend_mode){impl_->begin_layer(blend_mode);}\r
 void image_mixer::end_layer(){impl_->end_layer();}\r
-spl::shared_ptr<core::write_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc) {return impl_->create_frame(tag, desc);}\r
+spl::unique_ptr<core::data_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode) {return impl_->create_frame(tag, desc, frame_rate, field_mode);}\r
 \r
 }}}
\ No newline at end of file