return !(lhs == rhs);\r
}\r
\r
-struct layer\r
-{\r
- std::vector<item> items;\r
-\r
- layer()\r
- {\r
- }\r
-\r
- layer(std::vector<item> items)\r
- : items(std::move(items))\r
- {\r
- }\r
-};\r
-\r
-bool operator==(const layer& lhs, const layer& rhs)\r
-{\r
- return lhs.items == rhs.items;\r
-}\r
-\r
-bool operator!=(const layer& lhs, const layer& rhs)\r
-{\r
- return !(lhs == rhs);\r
-}\r
-\r
class image_renderer\r
{\r
- std::pair<std::vector<layer>, boost::shared_future<boost::iterator_range<const uint8_t*>>> last_image_;\r
- std::map<int, std::shared_ptr<SwsContext>> sws_contexts_;\r
+ std::pair<std::vector<item>, boost::shared_future<boost::iterator_range<const uint8_t*>>> last_image_;\r
+ tbb::concurrent_unordered_map<int, tbb::concurrent_bounded_queue<std::shared_ptr<SwsContext>>> sws_contexts_;\r
public: \r
- boost::shared_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)\r
+ boost::shared_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<item> items, const core::video_format_desc& format_desc)\r
{ \r
- if(last_image_.first == layers && last_image_.second.has_value())\r
+ if(last_image_.first == items && last_image_.second.has_value())\r
return last_image_.second;\r
\r
- auto image = render(layers, format_desc);\r
- last_image_ = std::make_pair(std::move(layers), image);\r
+ auto image = render(items, format_desc);\r
+ last_image_ = std::make_pair(std::move(items), image);\r
return image;\r
}\r
\r
private:\r
- boost::shared_future<boost::iterator_range<const uint8_t*>> render(std::vector<layer> layers, const core::video_format_desc& format_desc)\r
+ boost::shared_future<boost::iterator_range<const uint8_t*>> render(std::vector<item> items, const core::video_format_desc& format_desc)\r
{\r
- static const auto empty = spl::make_shared<const std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(2048*2048*4, 0);\r
- CASPAR_VERIFY(empty->size() >= format_desc.size);\r
+ convert(items, format_desc.width, format_desc.height); \r
\r
- std::vector<item> items;\r
- BOOST_FOREACH(auto& layer, layers)\r
- items.insert(items.end(), layer.items.begin(), layer.items.end());\r
-\r
- if(items.empty())\r
+ auto result = spl::make_shared<host_buffer>(format_desc.size, 0);\r
+ if(format_desc.field_mode != core::field_mode::progressive)\r
{\r
- return async(launch_policy::deferred, [=]\r
- {\r
- return boost::iterator_range<const uint8_t*>(empty->data(), empty->data() + format_desc.size);\r
- }); \r
+ auto upper = items;\r
+ auto lower = items;\r
+\r
+ BOOST_FOREACH(auto& item, upper)\r
+ item.transform.field_mode &= core::field_mode::upper;\r
+ \r
+ BOOST_FOREACH(auto& item, lower)\r
+ item.transform.field_mode &= core::field_mode::lower;\r
+ \r
+ draw(upper, result->data(), format_desc.width, format_desc.height);\r
+ draw(lower, result->data(), format_desc.width, format_desc.height);\r
+ }\r
+ else\r
+ {\r
+ draw(items, result->data(), format_desc.width, format_desc.height);\r
}\r
-\r
- convert(items.begin(), items.end(), format_desc); \r
- blend(items.begin(), items.end());\r
\r
- auto buffer = items.front().buffers.at(0);\r
return async(launch_policy::deferred, [=]\r
{\r
- return boost::iterator_range<const uint8_t*>(buffer->data(), buffer->data() + format_desc.size);\r
+ return boost::iterator_range<const uint8_t*>(result->data(), result->data() + format_desc.size);\r
}); \r
}\r
\r
- template<typename I>\r
- void blend(I begin, I end)\r
+ void draw(std::vector<item>& items, uint8_t* dest, int width, int height)\r
{\r
- for(auto it = begin + 1; it != end; ++it)\r
+ BOOST_FOREACH(auto& item, items)\r
{\r
- auto size = begin->buffers.at(0)->size();\r
- auto dest = begin->buffers.at(0)->data();\r
- auto source2 = it->buffers.at(0)->data();\r
- cpu::blend(dest, dest, source2, size);\r
+ auto field_mode = item.transform.field_mode; \r
+\r
+ if(field_mode == core::field_mode::empty)\r
+ continue;\r
+\r
+ auto start = field_mode == core::field_mode::lower ? 1 : 0;\r
+ auto step = field_mode == core::field_mode::progressive ? 1 : 2;\r
+\r
+ auto source2 = item.buffers.at(0)->data();\r
+\r
+ tbb::parallel_for(start, height, step, [&](int y)\r
+ {\r
+ cpu::blend(dest + y*width*4, dest + y*width*4, source2 + y*width*4, width*4);\r
+ });\r
}\r
}\r
\r
- template<typename I>\r
- void convert(I begin, I end, const core::video_format_desc& format_desc)\r
+ void convert(std::vector<item>& items, int width, int height)\r
{\r
- tbb::parallel_for_each(begin, end, [&](item& item)\r
+ tbb::parallel_for_each(items.begin(), items.end(), [&](item& item)\r
{\r
- if(item.pix_desc.format == core::pixel_format::bgra)\r
+ if(item.pix_desc.format == core::pixel_format::bgra && \r
+ item.pix_desc.planes.at(0).width == width &&\r
+ item.pix_desc.planes.at(0).height == height)\r
return;\r
\r
auto input_av_frame = ffmpeg::make_av_frame(item.buffers, item.pix_desc);\r
\r
int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
- \r
- auto& sws_context = sws_contexts_[key];\r
- if(!sws_context)\r
+ \r
+ auto& pool = sws_contexts_[key];\r
+\r
+ std::shared_ptr<SwsContext> sws_context;\r
+ if(!pool.try_pop(sws_context))\r
{\r
double param;\r
- sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), format_desc.width, format_desc.height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, ¶m), sws_freeContext);\r
+ sws_context.reset(sws_getContext(input_av_frame->width, input_av_frame->height, static_cast<PixelFormat>(input_av_frame->format), width, height, PIX_FMT_BGRA, SWS_BILINEAR, nullptr, nullptr, ¶m), sws_freeContext);\r
}\r
\r
if(!sws_context) \r
BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext")); \r
\r
- auto dest = spl::make_shared<host_buffer>(format_desc.size);\r
+ auto dest = spl::make_shared<host_buffer>(width*height*4);\r
\r
spl::shared_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free); \r
avcodec_get_frame_defaults(av_frame.get()); \r
- avpicture_fill(reinterpret_cast<AVPicture*>(av_frame.get()), dest->data(), PIX_FMT_BGRA, format_desc.width, format_desc.height);\r
+ avpicture_fill(reinterpret_cast<AVPicture*>(av_frame.get()), dest->data(), PIX_FMT_BGRA, width, height);\r
\r
sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, av_frame->data, av_frame->linesize); \r
\r
item.buffers.push_back(dest);\r
item.pix_desc = core::pixel_format_desc(core::pixel_format::bgra);\r
item.pix_desc.planes.clear();\r
- item.pix_desc.planes.push_back(core::pixel_format_desc::plane(format_desc.width, format_desc.height, 4));\r
+ item.pix_desc.planes.push_back(core::pixel_format_desc::plane(width, height, 4));\r
+\r
+ pool.push(sws_context);\r
});\r
}\r
};\r
{ \r
image_renderer renderer_;\r
std::vector<core::frame_transform> transform_stack_;\r
- std::vector<layer> layers_; // layer/stream/items\r
+ std::vector<item> items_; // layer/stream/items\r
public:\r
impl() \r
: transform_stack_(1) \r
\r
void begin_layer(core::blend_mode blend_mode)\r
{\r
- layers_.push_back(layer(std::vector<item>()));\r
}\r
\r
void push(core::frame_transform& transform)\r
item.transform = transform_stack_.back();\r
item.transform.volume = core::frame_transform().volume; // Set volume to default since we don't care about it here.\r
\r
- layers_.back().items.push_back(item);\r
+ items_.push_back(item);\r
}\r
\r
void pop()\r
\r
boost::shared_future<boost::iterator_range<const uint8_t*>> render(const core::video_format_desc& format_desc)\r
{\r
- // Remove empty layers.\r
- boost::range::remove_erase_if(layers_, [](const layer& layer)\r
- {\r
- return layer.items.empty();\r
- });\r
-\r
- return renderer_(std::move(layers_), format_desc);\r
+ return renderer_(std::move(items_), format_desc);\r
}\r
\r
virtual spl::shared_ptr<cpu::write_frame> create_frame(const void* tag, const core::pixel_format_desc& desc)\r
\r
#include "simd.h"\r
\r
-#include <tbb/parallel_for.h>\r
-\r
#include <intrin.h>\r
#include <stdint.h>\r
\r
const xmm_epi16 round = 128;\r
const xmm_epi16 lomask = 0x00FF;\r
\r
- tbb::parallel_for(tbb::blocked_range<size_t>(0, count/sizeof(xmm_epi8)), [&](const tbb::blocked_range<size_t>& r)\r
+ for(auto n = 0; n < count; n += 16) \r
{\r
- for(auto n = r.begin(); n != r.end(); ++n) \r
- {\r
- auto s = xmm_epi16::load(source1+n*16);\r
- auto d = xmm_epi16::load(source2+n*16);\r
+ auto s = xmm_epi16::load(source1+n);\r
+ auto d = xmm_epi16::load(source2+n);\r
\r
- // T(S, D) = S * D[A] + 0x80\r
- auto xxxa = xmm_cast<xmm_epi32>(d) >> 24;\r
- auto xaxa = xmm_cast<xmm_epi16>((xxxa << 16) | xxxa);\r
+ // T(S, D) = S * D[A] + 0x80\r
+ auto xxxa = xmm_cast<xmm_epi32>(d) >> 24;\r
+ auto xaxa = xmm_cast<xmm_epi16>((xxxa << 16) | xxxa);\r
\r
- auto xbxr = s & lomask;\r
- auto t1 = xmm_epi16::multiply_low(xbxr, xaxa) + round; \r
+ auto xbxr = s & lomask;\r
+ auto t1 = xmm_epi16::multiply_low(xbxr, xaxa) + round; \r
\r
- auto xaxg = s >> 8;\r
- auto t2 = xmm_epi16::multiply_low(xaxg, xaxa) + round;\r
+ auto xaxg = s >> 8;\r
+ auto t2 = xmm_epi16::multiply_low(xaxg, xaxa) + round;\r
\r
- // C(S, D) = S + D - (((T >> 8) + T) >> 8);\r
- auto bxrx = (t1 >> 8) + t1; \r
- auto axgx = (t2 >> 8) + t2; \r
- auto bgra = xmm_cast<xmm_epi8>((bxrx >> 8) | xmm_epi16::and_not(axgx, lomask));\r
+ // C(S, D) = S + D - (((T >> 8) + T) >> 8);\r
+ auto bxrx = (t1 >> 8) + t1; \r
+ auto axgx = (t2 >> 8) + t2; \r
+ auto bgra = xmm_cast<xmm_epi8>((bxrx >> 8) | xmm_epi16::and_not(axgx, lomask));\r
\r
- xmm_epi8::stream(xmm_cast<xmm_epi8>(s) + (xmm_cast<xmm_epi8>(d) - bgra), dest + n*16);\r
- } \r
- });\r
+ xmm_epi8::stream(xmm_cast<xmm_epi8>(s) + (xmm_cast<xmm_epi8>(d) - bgra), dest+n);\r
+ } \r
}\r
\r
}}}
\ No newline at end of file