\r
#include "image_mixer.h"\r
\r
-#include "../util/write_frame.h"\r
+#include "../util/data_frame.h"\r
#include "../util/xmm.h"\r
\r
#include <common/assert.h>\r
#include <common/concurrency/async.h>\r
#include <common/memory/memcpy.h>\r
\r
-#include <core/frame/write_frame.h>\r
+#include <core/frame/data_frame.h>\r
#include <core/frame/frame_transform.h>\r
#include <core/frame/pixel_format.h>\r
#include <core/video_format.h>\r
#include <boost/range/algorithm_ext/erase.hpp>\r
#include <boost/thread/future.hpp>\r
\r
-#include <intrin.h>\r
-#include <stdint.h>\r
#include <algorithm>\r
+#include <stdint.h>\r
#include <vector>\r
\r
#if defined(_MSC_VER)\r
{\r
core::pixel_format_desc pix_desc;\r
std::vector<spl::shared_ptr<host_buffer>> buffers;\r
- core::frame_transform transform;\r
+ core::image_transform transform;\r
\r
item()\r
: pix_desc(core::pixel_format::invalid)\r
return s8_x(s) + (d - argb);\r
}\r
\r
-template<typename write_tag>\r
-static void kernel(uint8_t* dest, const uint8_t* source, size_t count, const core::frame_transform& transform)\r
+template<typename temporal, typename alignment>\r
+static void kernel(uint8_t* dest, const uint8_t* source, size_t count)\r
{ \r
using namespace xmm;\r
\r
for(auto n = 0; n < count; n += 32) \r
{\r
- auto s0 = s8_x::load(dest+n+0);\r
- auto s1 = s8_x::load(dest+n+16);\r
+ auto s0 = s8_x::load<temporal_tag, alignment>(dest+n+0);\r
+ auto s1 = s8_x::load<temporal_tag, alignment>(dest+n+16);\r
\r
- auto d0 = s8_x::load(source+n+0);\r
- auto d1 = s8_x::load(source+n+16);\r
+ auto d0 = s8_x::load<temporal_tag, alignment>(source+n+0);\r
+ auto d1 = s8_x::load<temporal_tag, alignment>(source+n+16);\r
\r
auto argb0 = blend(d0, s0);\r
auto argb1 = blend(d1, s1);\r
\r
- s8_x::write(argb0, dest+n+0 , write_tag());\r
- s8_x::write(argb1, dest+n+16, write_tag());\r
+ s8_x::store<temporal, alignment>(argb0, dest+n+0 );\r
+ s8_x::store<temporal, alignment>(argb1, dest+n+16);\r
} \r
}\r
\r
+template<typename temporal>\r
+static void kernel(uint8_t* dest, const uint8_t* source, size_t count)\r
+{ \r
+ using namespace xmm;\r
+\r
+ if(reinterpret_cast<int>(dest) % 16 != 0 || reinterpret_cast<int>(source) % 16 != 0)\r
+ kernel<temporal_tag, unaligned_tag>(dest, source, count);\r
+ else\r
+ kernel<temporal_tag, aligned_tag>(dest, source, count);\r
+}\r
+\r
class image_renderer\r
{\r
std::pair<std::vector<item>, boost::shared_future<boost::iterator_range<const uint8_t*>>> last_image_;\r
{ \r
BOOST_FOREACH(auto& item, items)\r
item.transform.field_mode &= field_mode;\r
-\r
- boost::remove_erase_if(items, [](item& item){return item.transform.field_mode == core::field_mode::empty;});\r
+ \r
+ // Remove empty items.\r
+ boost::range::remove_erase_if(items, [&](const item& item)\r
+ {\r
+ return item.transform.field_mode == core::field_mode::empty;\r
+ });\r
+ \r
+ // Remove first field stills.\r
+ boost::range::remove_erase_if(items, [&](const item& item)\r
+ {\r
+ return item.transform.is_still && item.transform.field_mode == field_mode; // only us last field for stills.\r
+ });\r
\r
if(items.empty())\r
return;\r
-\r
- static const int CACHE_SIZE = 16384;\r
-\r
+ \r
auto start = field_mode == core::field_mode::lower ? 1 : 0;\r
auto step = field_mode == core::field_mode::progressive ? 1 : 2;\r
\r
// TODO: Add support for push transition.\r
// TODO: Add support for wipe transition.\r
// TODO: Add support for slide transition.\r
- tbb::parallel_for(tbb::blocked_range<int>(0, height/step, CACHE_SIZE/(width*4)), [&](const tbb::blocked_range<int>& r)\r
+ tbb::parallel_for(tbb::blocked_range<int>(0, height/step), [&](const tbb::blocked_range<int>& r)\r
{\r
- for(auto n = r.begin(); n != r.end(); ++n)\r
+ for(auto i = r.begin(); i != r.end(); ++i)\r
{\r
- auto y = n*step+start;\r
-\r
- auto it = items.begin();\r
- for(; it != items.end()-1; ++it) \r
- kernel<xmm::store_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
+ auto y = i*step+start;\r
\r
- kernel<xmm::stream_tag>(dest + y*width*4, it->buffers.at(0)->data() + y*width*4, width*4, it->transform);\r
+ for(std::size_t n = 0; n < items.size()-1; ++n)\r
+ kernel<xmm::temporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
+ \r
+ std::size_t n = items.size()-1; \r
+ kernel<xmm::nontemporal_tag>(dest + y*width*4, items[n].buffers.at(0)->data() + y*width*4, width*4);\r
}\r
+\r
+ _mm_mfence();\r
});\r
}\r
- \r
- void convert(std::vector<item>& items, int width, int height)\r
+ \r
+ void convert(std::vector<item>& source_items, int width, int height)\r
{\r
- // TODO: Don't convert buffers multiple times just because they are in different items due to e.g. interlacing.\r
- tbb::parallel_for_each(items.begin(), items.end(), [&](item& item)\r
- {\r
- if(item.pix_desc.format == core::pixel_format::bgra && \r
- item.pix_desc.planes.at(0).width == width &&\r
- item.pix_desc.planes.at(0).height == height)\r
+ std::set<std::vector<spl::shared_ptr<host_buffer>>> buffers;\r
+\r
+ BOOST_FOREACH(auto& item, source_items)\r
+ buffers.insert(item.buffers);\r
+ \r
+ auto dest_items = source_items;\r
+\r
+ tbb::parallel_for_each(buffers.begin(), buffers.end(), [&](const std::vector<spl::shared_ptr<host_buffer>>& buffers)\r
+ { \r
+ auto pix_desc = std::find_if(source_items.begin(), source_items.end(), [&](const item& item){return item.buffers == buffers;})->pix_desc;\r
+\r
+ if(pix_desc.format == core::pixel_format::bgra && \r
+ pix_desc.planes.at(0).width == width &&\r
+ pix_desc.planes.at(0).height == height)\r
return;\r
\r
- auto input_av_frame = ffmpeg::make_av_frame(item.buffers, item.pix_desc);\r
+ auto input_av_frame = ffmpeg::make_av_frame(buffers, pix_desc);\r
\r
int key = ((input_av_frame->width << 22) & 0xFFC00000) | ((input_av_frame->height << 6) & 0x003FC000) | ((input_av_frame->format << 7) & 0x00007F00);\r
\r
if(!sws_context) \r
BOOST_THROW_EXCEPTION(operation_failed() << msg_info("Could not create software scaling context.") << boost::errinfo_api_function("sws_getContext")); \r
\r
- auto dest = spl::make_shared<host_buffer>(width*height*4);\r
+ auto dest_frame = spl::make_shared<host_buffer>(width*height*4);\r
\r
- spl::shared_ptr<AVFrame> av_frame(avcodec_alloc_frame(), av_free); \r
- avcodec_get_frame_defaults(av_frame.get()); \r
- avpicture_fill(reinterpret_cast<AVPicture*>(av_frame.get()), dest->data(), PIX_FMT_BGRA, width, height);\r
+ {\r
+ spl::shared_ptr<AVFrame> dest_av_frame(avcodec_alloc_frame(), av_free); \r
+ avcodec_get_frame_defaults(dest_av_frame.get()); \r
+ avpicture_fill(reinterpret_cast<AVPicture*>(dest_av_frame.get()), dest_frame->data(), PIX_FMT_BGRA, width, height);\r
\r
- sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, av_frame->data, av_frame->linesize); \r
- \r
- item.buffers.clear();\r
- item.buffers.push_back(dest);\r
- item.pix_desc = core::pixel_format_desc(core::pixel_format::bgra);\r
- item.pix_desc.planes.clear();\r
- item.pix_desc.planes.push_back(core::pixel_format_desc::plane(width, height, 4));\r
+ sws_scale(sws_context.get(), input_av_frame->data, input_av_frame->linesize, 0, input_av_frame->height, dest_av_frame->data, dest_av_frame->linesize); \r
+ pool.push(sws_context);\r
+ }\r
+ \r
+ for(std::size_t n = 0; n < source_items.size(); ++n)\r
+ {\r
+ if(source_items[n].buffers == buffers)\r
+ {\r
+ dest_items[n].buffers = boost::assign::list_of(dest_frame);\r
+ dest_items[n].pix_desc = core::pixel_format_desc(core::pixel_format::bgra);\r
+ dest_items[n].pix_desc.planes = boost::assign::list_of(core::pixel_format_desc::plane(width, height, 4));\r
+ dest_items[n].transform = source_items[n].transform;\r
+ }\r
+ }\r
+ }); \r
\r
- pool.push(sws_context);\r
- });\r
+ source_items = std::move(dest_items);\r
}\r
};\r
\r
struct image_mixer::impl : boost::noncopyable\r
{ \r
image_renderer renderer_;\r
- std::vector<core::frame_transform> transform_stack_;\r
+ std::vector<core::image_transform> transform_stack_;\r
std::vector<item> items_; // layer/stream/items\r
public:\r
impl() \r
: transform_stack_(1) \r
{\r
- CASPAR_LOG(info) << L"Initialized CPU Accelerated Image Mixer";\r
+ CASPAR_LOG(info) << L"Initialized Streaming SIMD Extensions Accelerated CPU Image Mixer";\r
}\r
\r
void begin_layer(core::blend_mode blend_mode)\r
{\r
}\r
\r
- void push(core::frame_transform& transform)\r
+ void push(const core::frame_transform& transform)\r
{\r
- transform_stack_.push_back(transform_stack_.back()*transform);\r
+ transform_stack_.push_back(transform_stack_.back()*transform.image_transform);\r
}\r
\r
- void visit(core::data_frame& frame2)\r
+ void visit(const core::data_frame& frame2)\r
{ \r
- write_frame* frame = dynamic_cast<write_frame*>(&frame2);\r
+ auto frame = dynamic_cast<const cpu::data_frame*>(&frame2);\r
if(frame == nullptr)\r
return;\r
\r
- if(frame->get_pixel_format_desc().format == core::pixel_format::invalid)\r
+ if(frame->pixel_format_desc().format == core::pixel_format::invalid)\r
return;\r
\r
- if(frame->get_buffers().empty())\r
+ if(frame->buffers().empty())\r
return;\r
\r
if(transform_stack_.back().field_mode == core::field_mode::empty)\r
return;\r
\r
item item;\r
- item.pix_desc = frame->get_pixel_format_desc();\r
- item.buffers = frame->get_buffers(); \r
+ item.pix_desc = frame->pixel_format_desc();\r
+ item.buffers = frame->buffers(); \r
item.transform = transform_stack_.back();\r
- item.transform.volume = core::frame_transform().volume; // Set volume to default since we don't care about it here.\r
\r
items_.push_back(item);\r
}\r
return renderer_(std::move(items_), format_desc);\r
}\r
\r
- virtual spl::shared_ptr<cpu::write_frame> create_frame(const void* tag, const core::pixel_format_desc& desc)\r
+ virtual spl::unique_ptr<core::data_frame> create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode)\r
{\r
- return spl::make_shared<cpu::write_frame>(tag, desc);\r
+ return spl::make_unique<cpu::data_frame>(tag, desc, frame_rate, field_mode);\r
}\r
};\r
\r
image_mixer::image_mixer() : impl_(new impl()){}\r
-void image_mixer::push(core::frame_transform& transform){impl_->push(transform);}\r
-void image_mixer::visit(core::data_frame& frame){impl_->visit(frame);}\r
+void image_mixer::push(const core::frame_transform& transform){impl_->push(transform);}\r
+void image_mixer::visit(const core::data_frame& frame){impl_->visit(frame);}\r
void image_mixer::pop(){impl_->pop();}\r
boost::shared_future<boost::iterator_range<const uint8_t*>> image_mixer::operator()(const core::video_format_desc& format_desc){return impl_->render(format_desc);}\r
void image_mixer::begin_layer(core::blend_mode blend_mode){impl_->begin_layer(blend_mode);}\r
void image_mixer::end_layer(){impl_->end_layer();}\r
-spl::shared_ptr<core::write_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc) {return impl_->create_frame(tag, desc);}\r
+spl::unique_ptr<core::data_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode) {return impl_->create_frame(tag, desc, frame_rate, field_mode);}\r
\r
}}}
\ No newline at end of file