]> git.sesse.net Git - casparcg/commitdiff
git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches...
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sat, 11 Feb 2012 13:28:22 +0000 (13:28 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sat, 11 Feb 2012 13:28:22 +0000 (13:28 +0000)
accelerator/cpu/image/image_mixer.cpp
common/tweener.cpp

index 6493101f1201e96c7714f04129f8e448a0c257bf..eb08afc728df941e93b163a12866225964948456 100644 (file)
@@ -121,11 +121,11 @@ static void kernel(uint8_t* dest, const uint8_t* source, size_t count)
 \r
        for(auto n = 0; n < count; n += 32)    \r
        {\r
-               auto s0 = s8_x::load<xmm::temporal_tag, alignment>(dest+n+0);\r
-               auto s1 = s8_x::load<xmm::temporal_tag, alignment>(dest+n+16);\r
+               auto s0 = s8_x::load<temporal_tag, alignment>(dest+n+0);\r
+               auto s1 = s8_x::load<temporal_tag, alignment>(dest+n+16);\r
 \r
-               auto d0 = s8_x::load<xmm::temporal_tag, alignment>(source+n+0);\r
-               auto d1 = s8_x::load<xmm::temporal_tag, alignment>(source+n+16);\r
+               auto d0 = s8_x::load<temporal_tag, alignment>(source+n+0);\r
+               auto d1 = s8_x::load<temporal_tag, alignment>(source+n+16);\r
                \r
                auto argb0 = blend(d0, s0);\r
                auto argb1 = blend(d1, s1);\r
@@ -133,8 +133,17 @@ static void kernel(uint8_t* dest, const uint8_t* source, size_t count)
                s8_x::store<temporal, alignment>(argb0, dest+n+0 );\r
                s8_x::store<temporal, alignment>(argb1, dest+n+16);\r
        } \r
+}\r
 \r
-       _mm_mfence();\r
+template<typename temporal>\r
+static void kernel(uint8_t* dest, const uint8_t* source, size_t count)\r
+{                      \r
+       using namespace xmm;\r
+\r
+       if(reinterpret_cast<int>(dest) % 16 != 0 || reinterpret_cast<int>(source) % 16 != 0)\r
+               kernel<temporal_tag, unaligned_tag>(dest, source, count);\r
+       else\r
+               kernel<temporal_tag, aligned_tag>(dest, source, count);\r
 }\r
 \r
 class image_renderer\r
@@ -196,74 +205,79 @@ private:
                // TODO: Add support for slide transition.\r
                tbb::parallel_for(tbb::blocked_range<int>(0, height/step), [&](const tbb::blocked_range<int>& r)\r
                {\r
-                       for(auto n = r.begin(); n != r.end(); ++n)\r
+                       for(auto i = r.begin(); i != r.end(); ++i)\r
                        {\r
-                               auto y = n*step+start;\r
-\r
-                               auto it = items.begin();\r
-                               for(; it != items.end()-1; ++it)                        \r
-                                       kernel<xmm::temporal_tag, xmm::aligned_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4);\r
+                               auto y = i*step+start;\r
 \r
-                               kernel<xmm::nontemporal_tag, xmm::aligned_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4);\r
+                               for(std::size_t n = 0; n < items.size()-1; ++n)\r
+                                       kernel<xmm::temporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
+                               \r
+                               std::size_t n = items.size()-1;                         \r
+                               kernel<xmm::nontemporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
                        }\r
+\r
+                       _mm_mfence();\r
                });\r
        }\r
-       \r
-       void convert(std::vector<item>& items, int width, int height)\r
+               \r
+       void convert(std::vector<item>& source_items, int width, int height)\r
        {\r
                std::set<std::vector<spl::shared_ptr<host_buffer>>> buffers;\r
 \r
-               BOOST_FOREACH(auto& item, items)\r
+               BOOST_FOREACH(auto& item, source_items)\r
                        buffers.insert(item.buffers);\r
                \r
-               tbb::parallel_for_each(buffers.begin(), buffers.end(), std::bind(&image_renderer::do_convert, this, std::ref(items), std::placeholders::_1, width, height));                                    \r
-       }\r
+               auto dest_items = source_items;\r
 \r
-       void do_convert(std::vector<item>& items, const std::vector<spl::shared_ptr<host_buffer>>& buffers, int width, int height)\r
-       {               \r
-               auto pix_desc  = std::find_if(items.begin(), items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
+               tbb::parallel_for_each(buffers.begin(), buffers.end(), [&](const std::vector<spl::shared_ptr<host_buffer>>& buffers)\r
+               {                       \r
+                       auto pix_desc = std::find_if(source_items.begin(), source_items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
 \r
-               if(pix_desc.format == core::pixel_format::bgra && \r
-                       pix_desc.planes.at(0).width == width &&\r
-                       pix_desc.planes.at(0).height == height)\r
-                       return;\r
+                       if(pix_desc.format == core::pixel_format::bgra && \r
+                               pix_desc.planes.at(0).width == width &&\r
+                               pix_desc.planes.at(0).height == height)\r
+                               return;\r
 \r
-               auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
+                       auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
                                                                \r
-               int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
+                       int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
                                                \r
-               auto& pool = sws_contexts_[key];\r
+                       auto& pool = sws_contexts_[key];\r
 \r
-               std::shared_ptr<SwsContext> sws_context;\r
-               if(!pool.try_pop(sws_context))\r
-               {\r
-                       double param;\r
-                       sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, &param), sws_freeContext);\r
-               }\r
+                       std::shared_ptr<SwsContext> sws_context;\r
+                       if(!pool.try_pop(sws_context))\r
+                       {\r
+                               double param;\r
+                               sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, &param), sws_freeContext);\r
+                       }\r
                        \r
-               if(!sws_context)                                \r
-                       BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext"));                           \r
+                       if(!sws_context)                                \r
+                               BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext"));                           \r
                \r
-               auto dest = spl::make_shared<host_buffer>(width*height*4);\r
+                       auto dest_frame = spl::make_shared<host_buffer>(width*height*4);\r
 \r
-               {\r
-                       spl::shared_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free);      \r
-                       avcodec_get_frame_defaults(av_frame.get());                     \r
-                       avpicture_fill(reinterpret_cast<AVPicture*>(av_frame.get()), dest->data(), PIX_FMT_BGRA, width, height);\r
+                       {\r
+                               spl::shared_ptr<AVFrame> dest_av_frame(avcodec_alloc_frame(), av_free); \r
+                               avcodec_get_frame_defaults(dest_av_frame.get());                        \r
+                               avpicture_fill(reinterpret_cast<AVPicture*>(dest_av_frame.get()), dest_frame->data(), PIX_FMT_BGRA, width, height);\r
                                \r
-                       sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, av_frame->data, av_frame->linesize);                            \r
-                       pool.push(sws_context);\r
-               }\r
-                       \r
-               BOOST_FOREACH(auto& item, items)\r
-               {\r
-                       if(item.buffers == buffers)\r
+                               sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, dest_av_frame->data, dest_av_frame->linesize);                          \r
+                               pool.push(sws_context);\r
+                       }\r
+               \r
+                       for(std::size_t n = 0; n < source_items.size(); ++n)\r
                        {\r
-                               item.buffers                    = boost::assign::list_of(dest);\r
-                               item.pix_desc                   = core::pixel_format_desc(core::pixel_format::bgra);\r
-                               item.pix_desc.planes    = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+                               if(source_items[n].buffers == buffers)\r
+                               {\r
+                                       dest_items[n].buffers                   = boost::assign::list_of(dest_frame);\r
+                                       dest_items[n].pix_desc                  = core::pixel_format_desc(core::pixel_format::bgra);\r
+                                       dest_items[n].pix_desc.planes   = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+                                       dest_items[n].transform                 = source_items[n].transform;\r
+                               }\r
                        }\r
-               }       \r
+               });     \r
+\r
+               source_items = std::move(dest_items);\r
        }\r
 };\r
                \r
index 2cd0220cd7e922599be48ee19b74a3243437d837..45849e045e72b1d4607bd34bb21a40ba7248ac20 100644 (file)
@@ -449,9 +449,10 @@ tweener_t get_tweener(std::wstring name)
        if(it == tweens.end())\r
                BOOST_THROW_EXCEPTION(invalid_argument() << msg_info("Could not find tween.") << arg_value_info(name));\r
        \r
+       auto tween = it->second;\r
        return [=](double t, double b, double c, double d)\r
        {\r
-               return it->second(t, b, c, d, params);\r
+               return tween(t, b, c, d, params);\r
        };\r
 };\r
 \r