enum output_pixels\r
{\r
fill_and_key,\r
- fill_only,\r
key_only\r
};\r
\r
STDMETHOD(GetAncillaryData(IDeckLinkVideoFrameAncillary** ancillary)) {return S_FALSE;}\r
};\r
\r
-std::shared_ptr<IDeckLinkVideoFrame> make_alpha_only_frame(const CComQIPtr<IDeckLinkOutput>& decklink, const safe_ptr<const core::read_frame>& frame, const core::video_format_desc& format_desc)\r
-{\r
- IDeckLinkMutableVideoFrame* result;\r
+void make_alpha(void* dest, const void* source, size_t count)\r
+{ \r
+ __m128i* dest128 = reinterpret_cast<__m128i*>(dest); \r
+ const __m128i* source128 = reinterpret_cast<const __m128i*>(source);\r
\r
- if(FAILED(decklink->CreateVideoFrame(format_desc.width, format_desc.height, format_desc.size/format_desc.height, bmdFormat8BitBGRA, bmdFrameFlagDefault, &result)))\r
- BOOST_THROW_EXCEPTION(caspar_exception());\r
+ count /= 16; // 128 bit\r
\r
- void* bytes = nullptr;\r
- if(FAILED(result->GetBytes(&bytes)))\r
- BOOST_THROW_EXCEPTION(caspar_exception());\r
- \r
- unsigned char* data = reinterpret_cast<unsigned char*>(bytes);\r
+ __m128i xmm0, xmm1, xmm2, xmm3;\r
\r
- if(static_cast<size_t>(frame->image_data().size()) == format_desc.size)\r
+ const __m128i mask128 = _mm_set_epi8(3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15);\r
+ for(size_t n = 0; n < count/4; ++n)\r
{\r
- tbb::parallel_for(tbb::blocked_range<int>(0, frame->image_data().size()/4), [&](const tbb::blocked_range<int>& r)\r
- {\r
- for(int n = r.begin(); n != r.end(); ++n)\r
- {\r
- data[n*4+0] = frame->image_data()[n*4+3];\r
- data[n*4+1] = frame->image_data()[n*4+3];\r
- data[n*4+2] = frame->image_data()[n*4+3];\r
- data[n*4+3] = 255;\r
- }\r
- });\r
+ xmm0 = _mm_load_si128(source128++); \r
+ xmm1 = _mm_load_si128(source128++); \r
+ xmm2 = _mm_load_si128(source128++); \r
+ xmm3 = _mm_load_si128(source128++); \r
+\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm0, mask128));\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm1, mask128));\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm2, mask128));\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm3, mask128));\r
}\r
- else\r
- memset(data, 0, format_desc.size);\r
-\r
- return std::shared_ptr<IDeckLinkVideoFrame>(result, [](IDeckLinkMutableVideoFrame* p) {p->Release();});\r
}\r
\r
-std::shared_ptr<IDeckLinkVideoFrame> make_fill_only_frame(const CComQIPtr<IDeckLinkOutput>& decklink, const safe_ptr<const core::read_frame>& frame, const core::video_format_desc& format_desc)\r
+std::shared_ptr<IDeckLinkVideoFrame> make_alpha_only_frame(const CComQIPtr<IDeckLinkOutput>& decklink, const safe_ptr<const core::read_frame>& frame, const core::video_format_desc& format_desc)\r
{\r
IDeckLinkMutableVideoFrame* result;\r
\r
\r
if(static_cast<size_t>(frame->image_data().size()) == format_desc.size)\r
{\r
- tbb::parallel_for(tbb::blocked_range<int>(0, frame->image_data().size()/4), [&](const tbb::blocked_range<int>& r)\r
- {\r
- for(int n = r.begin(); n != r.end(); ++n)\r
- {\r
- data[n*4+0] = frame->image_data()[n*4+0];\r
- data[n*4+1] = frame->image_data()[n*4+1];\r
- data[n*4+2] = frame->image_data()[n*4+2];\r
- data[n*4+3] = 255;\r
- }\r
- });\r
+ size_t count = frame->image_data().size();\r
+ tbb::affinity_partitioner ap;\r
+ tbb::parallel_for(tbb::blocked_range<size_t>(0, count/128), [&](const tbb::blocked_range<size_t>& r)\r
+ { \r
+ make_alpha(reinterpret_cast<char*>(data) + r.begin()*128, reinterpret_cast<const char*>(frame->image_data().begin()) + r.begin()*128, r.size()*128); \r
+ }, ap);\r
}\r
else\r
memset(data, 0, format_desc.size);\r
std::shared_ptr<IDeckLinkVideoFrame> deck_frame;\r
if(config_.output == key_only)\r
deck_frame = make_alpha_only_frame(output_, frame, format_desc_);\r
- else if(config_.output == fill_only)\r
- deck_frame = make_fill_only_frame(output_, frame, format_desc_);\r
else \r
deck_frame = std::make_shared<decklink_frame_adapter>(frame, format_desc_);\r
\r
config.latency = low_latency;\r
\r
auto output_str = ptree.get("output", "fill_and_key");\r
- if(output_str == "fill_only")\r
- config.output = fill_only;\r
- else if(output_str == "key_only")\r
+ if(output_str == "key_only")\r
config.output = key_only;\r
\r
config.device_index = ptree.get("device", 0);\r