<ClInclude Include="log\log.h" />\r
<ClInclude Include="memory\memclr.h" />\r
<ClInclude Include="memory\memcpy.h" />\r
+ <ClInclude Include="memory\memshfl.h" />\r
<ClInclude Include="memory\page_locked_allocator.h" />\r
<ClInclude Include="memory\safe_ptr.h" />\r
<ClInclude Include="env.h" />\r
<ClInclude Include="concurrency\com_context.h">\r
<Filter>concurrency</Filter>\r
</ClInclude>\r
+ <ClInclude Include="memory\memshfl.h">\r
+ <Filter>memory</Filter>\r
+ </ClInclude>\r
</ItemGroup>\r
</Project>
\ No newline at end of file
--- /dev/null
+/*\r
+* copyright (c) 2010 Sveriges Television AB <info@casparcg.com>\r
+*\r
+* This file is part of CasparCG.\r
+*\r
+* CasparCG is free software: you can redistribute it and/or modify\r
+* it under the terms of the GNU General Public License as published by\r
+* the Free Software Foundation, either version 3 of the License, or\r
+* (at your option) any later version.\r
+*\r
+* CasparCG is distributed in the hope that it will be useful,\r
+* but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+* GNU General Public License for more details.\r
+\r
+* You should have received a copy of the GNU General Public License\r
+* along with CasparCG. If not, see <http://www.gnu.org/licenses/>.\r
+*\r
+*/\r
+#pragma once\r
+\r
+#include <intrin.h>\r
+\r
+#include <assert.h>\r
+\r
+#include <tbb/parallel_for.h>\r
+\r
+namespace caspar {\r
+\r
+namespace internal {\r
+\r
+static void* fast_memsfhl(void* dest, const void* source, size_t count, int m1, int m2, int m3, int m4)\r
+{\r
+ __m128i* dest128 = reinterpret_cast<__m128i*>(dest); \r
+ const __m128i* source128 = reinterpret_cast<const __m128i*>(source);\r
+\r
+ count /= 16; // 128 bit\r
+\r
+ __m128i xmm0, xmm1, xmm2, xmm3;\r
+\r
+ const __m128i mask128 = _mm_set_epi32(m1, m2, m3, m4);\r
+ for(size_t n = 0; n < count/4; ++n)\r
+ {\r
+ xmm0 = _mm_load_si128(source128++); \r
+ xmm1 = _mm_load_si128(source128++); \r
+ xmm2 = _mm_load_si128(source128++); \r
+ xmm3 = _mm_load_si128(source128++); \r
+\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm0, mask128));\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm1, mask128));\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm2, mask128));\r
+ _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm3, mask128));\r
+ }\r
+ return dest;\r
+}\r
+\r
+}\r
+\r
+static void* fast_memsfhl(void* dest, const void* source, size_t count, int m1, int m2, int m3, int m4)\r
+{ \r
+ tbb::affinity_partitioner ap;\r
+ tbb::parallel_for(tbb::blocked_range<size_t>(0, count/128), [&](const tbb::blocked_range<size_t>& r)\r
+ { \r
+ internal::fast_memsfhl(reinterpret_cast<char*>(dest) + r.begin()*128, reinterpret_cast<const char*>(source) + r.begin()*128, r.size()*128, m1, m2, m3, m4); \r
+ }, ap);\r
+\r
+ return dest;\r
+}\r
+\r
+\r
+}
\ No newline at end of file
#include <common/exception/exceptions.h>\r
#include <common/memory/memcpy.h>\r
#include <common/memory/memclr.h>\r
+#include <common/memory/memshfl.h>\r
\r
#include <tbb/concurrent_queue.h>\r
\r
STDMETHOD(GetAncillaryData(IDeckLinkVideoFrameAncillary** ancillary)) {return S_FALSE;}\r
};\r
\r
-void make_alpha(void* dest, const void* source, size_t count)\r
-{ \r
- __m128i* dest128 = reinterpret_cast<__m128i*>(dest); \r
- const __m128i* source128 = reinterpret_cast<const __m128i*>(source);\r
-\r
- count /= 16; // 128 bit\r
-\r
- __m128i xmm0, xmm1, xmm2, xmm3;\r
-\r
- const __m128i mask128 = _mm_set_epi8(3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15);\r
- for(size_t n = 0; n < count/4; ++n)\r
- {\r
- xmm0 = _mm_load_si128(source128++); \r
- xmm1 = _mm_load_si128(source128++); \r
- xmm2 = _mm_load_si128(source128++); \r
- xmm3 = _mm_load_si128(source128++); \r
-\r
- _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm0, mask128));\r
- _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm1, mask128));\r
- _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm2, mask128));\r
- _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm3, mask128));\r
- }\r
-}\r
-\r
std::shared_ptr<IDeckLinkVideoFrame> make_alpha_only_frame(const CComQIPtr<IDeckLinkOutput>& decklink, const safe_ptr<const core::read_frame>& frame, const core::video_format_desc& format_desc)\r
{\r
+ if(static_cast<size_t>(frame->image_data().size()) != format_desc.size)\r
+ return std::make_shared<decklink_frame_adapter>(frame, format_desc);\r
+\r
IDeckLinkMutableVideoFrame* result;\r
\r
if(FAILED(decklink->CreateVideoFrame(format_desc.width, format_desc.height, format_desc.size/format_desc.height, bmdFormat8BitBGRA, bmdFrameFlagDefault, &result)))\r
void* bytes = nullptr;\r
if(FAILED(result->GetBytes(&bytes)))\r
BOOST_THROW_EXCEPTION(caspar_exception());\r
- \r
- unsigned char* data = reinterpret_cast<unsigned char*>(bytes);\r
\r
- if(static_cast<size_t>(frame->image_data().size()) == format_desc.size)\r
- {\r
- size_t count = frame->image_data().size();\r
- tbb::affinity_partitioner ap;\r
- tbb::parallel_for(tbb::blocked_range<size_t>(0, count/128), [&](const tbb::blocked_range<size_t>& r)\r
- { \r
- make_alpha(reinterpret_cast<char*>(data) + r.begin()*128, reinterpret_cast<const char*>(frame->image_data().begin()) + r.begin()*128, r.size()*128); \r
- }, ap);\r
- }\r
- else\r
- memset(data, 0, format_desc.size);\r
+ fast_memsfhl(reinterpret_cast<unsigned char*>(bytes), frame->image_data().begin(), frame->image_data().size(), 0x03030303, 0x07070707, 0x0B0B0B0B, 0x0F0F0F0F);\r
\r
return std::shared_ptr<IDeckLinkVideoFrame>(result, [](IDeckLinkMutableVideoFrame* p) {p->Release();});\r
}\r
<embedded-audio>true</embedded-audio>\r
<latency>low</latency>\r
<key>external</key>\r
- <output>fill_and_key</output>\r
+ <output>key_only</output>\r
</decklink>\r
<!--<ogl>\r
<device>1</device>\r