\r
for(auto n = 0; n < count; n += 32) \r
{\r
- auto s0 = s8_x::load<xmm::temporal_tag, alignment>(dest+n+0);\r
- auto s1 = s8_x::load<xmm::temporal_tag, alignment>(dest+n+16);\r
+ auto s0 = s8_x::load<temporal_tag, alignment>(dest+n+0);\r
+ auto s1 = s8_x::load<temporal_tag, alignment>(dest+n+16);\r
\r
- auto d0 = s8_x::load<xmm::temporal_tag, alignment>(source+n+0);\r
- auto d1 = s8_x::load<xmm::temporal_tag, alignment>(source+n+16);\r
+ auto d0 = s8_x::load<temporal_tag, alignment>(source+n+0);\r
+ auto d1 = s8_x::load<temporal_tag, alignment>(source+n+16);\r
\r
auto argb0 = blend(d0, s0);\r
auto argb1 = blend(d1, s1);\r
s8_x::store<temporal, alignment>(argb0, dest+n+0 );\r
s8_x::store<temporal, alignment>(argb1, dest+n+16);\r
} \r
+}\r
\r
- _mm_mfence();\r
+template<typename temporal>\r
+static void kernel(uint8_t* dest, const uint8_t* source, size_t count)\r
+{ \r
+ using namespace xmm;\r
+\r
+ if(reinterpret_cast<int>(dest) % 16 != 0 || reinterpret_cast<int>(source) % 16 != 0)\r
+ kernel<temporal_tag, unaligned_tag>(dest, source, count);\r
+ else\r
+ kernel<temporal_tag, aligned_tag>(dest, source, count);\r
}\r
\r
class image_renderer\r
// TODO: Add support for slide transition.\r
tbb::parallel_for(tbb::blocked_range<int>(0, height/step), [&](const tbb::blocked_range<int>& r)\r
{\r
- for(auto n = r.begin(); n != r.end(); ++n)\r
+ for(auto i = r.begin(); i != r.end(); ++i)\r
{\r
- auto y = n*step+start;\r
-\r
- auto it = items.begin();\r
- for(; it != items.end()-1; ++it) \r
- kernel<xmm::temporal_tag, xmm::aligned_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4);\r
+ auto y = i*step+start;\r
\r
- kernel<xmm::nontemporal_tag, xmm::aligned_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4);\r
+ for(std::size_t n = 0; n < items.size()-1; ++n)\r
+ kernel<xmm::temporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
+ \r
+ std::size_t n = items.size()-1; \r
+ kernel<xmm::nontemporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
}\r
+\r
+ _mm_mfence();\r
});\r
}\r
- \r
- void convert(std::vector<item>& items, int width, int height)\r
+ \r
+ void convert(std::vector<item>& source_items, int width, int height)\r
{\r
std::set<std::vector<spl::shared_ptr<host_buffer>>> buffers;\r
\r
- BOOST_FOREACH(auto& item, items)\r
+ BOOST_FOREACH(auto& item, source_items)\r
buffers.insert(item.buffers);\r
\r
- tbb::parallel_for_each(buffers.begin(), buffers.end(), std::bind(&image_renderer::do_convert, this, std::ref(items), std::placeholders::_1, width, height)); \r
- }\r
+ auto dest_items = source_items;\r
\r
- void do_convert(std::vector<item>& items, const std::vector<spl::shared_ptr<host_buffer>>& buffers, int width, int height)\r
- { \r
- auto pix_desc = std::find_if(items.begin(), items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
+ tbb::parallel_for_each(buffers.begin(), buffers.end(), [&](const std::vector<spl::shared_ptr<host_buffer>>& buffers)\r
+ { \r
+ auto pix_desc = std::find_if(source_items.begin(), source_items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
\r
- if(pix_desc.format == core::pixel_format::bgra && \r
- pix_desc.planes.at(0).width == width &&\r
- pix_desc.planes.at(0).height == height)\r
- return;\r
+ if(pix_desc.format == core::pixel_format::bgra && \r
+ pix_desc.planes.at(0).width == width &&\r
+ pix_desc.planes.at(0).height == height)\r
+ return;\r
\r
- auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
+ auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
\r
- int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
+ int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
\r
- auto& pool = sws_contexts_[key];\r
+ auto& pool = sws_contexts_[key];\r
\r
- std::shared_ptr<SwsContext> sws_context;\r
- if(!pool.try_pop(sws_context))\r
- {\r
- double param;\r
- sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, ¶m), sws_freeContext);\r
- }\r
+ std::shared_ptr<SwsContext> sws_context;\r
+ if(!pool.try_pop(sws_context))\r
+ {\r
+ double param;\r
+ sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, ¶m), sws_freeContext);\r
+ }\r
\r
- if(!sws_context) \r
- BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext")); \r
+ if(!sws_context) \r
+ BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext")); \r
\r
- auto dest = spl::make_shared<host_buffer>(width*height*4);\r
+ auto dest_frame = spl::make_shared<host_buffer>(width*height*4);\r
\r
- {\r
- spl::shared_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free); \r
- avcodec_get_frame_defaults(av_frame.get()); \r
- avpicture_fill(reinterpret_cast<AVPicture*>(av_frame.get()), dest->data(), PIX_FMT_BGRA, width, height);\r
+ {\r
+ spl::shared_ptr<AVFrame> dest_av_frame(avcodec_alloc_frame(), av_free); \r
+ avcodec_get_frame_defaults(dest_av_frame.get()); \r
+ avpicture_fill(reinterpret_cast<AVPicture*>(dest_av_frame.get()), dest_frame->data(), PIX_FMT_BGRA, width, height);\r
\r
- sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, av_frame->data, av_frame->linesize); \r
- pool.push(sws_context);\r
- }\r
- \r
- BOOST_FOREACH(auto& item, items)\r
- {\r
- if(item.buffers == buffers)\r
+ sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, dest_av_frame->data, dest_av_frame->linesize); \r
+ pool.push(sws_context);\r
+ }\r
+ \r
+ for(std::size_t n = 0; n < source_items.size(); ++n)\r
{\r
- item.buffers = boost::assign::list_of(dest);\r
- item.pix_desc = core::pixel_format_desc(core::pixel_format::bgra);\r
- item.pix_desc.planes = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+ if(source_items[n].buffers == buffers)\r
+ {\r
+ dest_items[n].buffers = boost::assign::list_of(dest_frame);\r
+ dest_items[n].pix_desc = core::pixel_format_desc(core::pixel_format::bgra);\r
+ dest_items[n].pix_desc.planes = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+ dest_items[n].transform = source_items[n].transform;\r
+ }\r
}\r
- } \r
+ }); \r
+\r
+ source_items = std::move(dest_items);\r
}\r
};\r
\r