]> git.sesse.net Git - casparcg/commitdiff
2.0.0.2: Refactored key_only implementation.
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sun, 15 May 2011 21:29:05 +0000 (21:29 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Sun, 15 May 2011 21:29:05 +0000 (21:29 +0000)
git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches/2.0.0.2@757 362d55ac-95cf-4e76-9f9a-cbaa9c17b72d

common/common.vcxproj
common/common.vcxproj.filters
common/memory/memshfl.h [new file with mode: 0644]
modules/decklink/consumer/decklink_consumer.cpp
shell/caspar.config

index 235708c9de8dab6effe8c3ee2712f6a5c8e99ea2..3f1086751f2e21f8fa276f42ad851cd5167c4203 100644 (file)
     <ClInclude Include="log\log.h" />\r
     <ClInclude Include="memory\memclr.h" />\r
     <ClInclude Include="memory\memcpy.h" />\r
+    <ClInclude Include="memory\memshfl.h" />\r
     <ClInclude Include="memory\page_locked_allocator.h" />\r
     <ClInclude Include="memory\safe_ptr.h" />\r
     <ClInclude Include="env.h" />\r
index b0b0d26adc1002932f6611460f113ebcf75ab45c..bc2e3992dce47f9192aece3b58154ce3cd2e6190 100644 (file)
     <ClInclude Include="concurrency\com_context.h">\r
       <Filter>concurrency</Filter>\r
     </ClInclude>\r
+    <ClInclude Include="memory\memshfl.h">\r
+      <Filter>memory</Filter>\r
+    </ClInclude>\r
   </ItemGroup>\r
 </Project>
\ No newline at end of file
diff --git a/common/memory/memshfl.h b/common/memory/memshfl.h
new file mode 100644 (file)
index 0000000..32b6c0a
--- /dev/null
@@ -0,0 +1,71 @@
+/*\r
+* copyright (c) 2010 Sveriges Television AB <info@casparcg.com>\r
+*\r
+*  This file is part of CasparCG.\r
+*\r
+*    CasparCG is free software: you can redistribute it and/or modify\r
+*    it under the terms of the GNU General Public License as published by\r
+*    the Free Software Foundation, either version 3 of the License, or\r
+*    (at your option) any later version.\r
+*\r
+*    CasparCG is distributed in the hope that it will be useful,\r
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
+*    GNU General Public License for more details.\r
+\r
+*    You should have received a copy of the GNU General Public License\r
+*    along with CasparCG.  If not, see <http://www.gnu.org/licenses/>.\r
+*\r
+*/\r
+#pragma once\r
+\r
+#include <intrin.h>\r
+\r
+#include <assert.h>\r
+\r
+#include <tbb/parallel_for.h>\r
+\r
+namespace caspar {\r
+\r
+namespace internal {\r
+\r
+static void* fast_memsfhl(void* dest, const void* source, size_t count, int m1, int m2, int m3, int m4)\r
+{\r
+       __m128i*           dest128 = reinterpret_cast<__m128i*>(dest);  \r
+       const __m128i* source128 = reinterpret_cast<const __m128i*>(source);\r
+\r
+       count /= 16; // 128 bit\r
+\r
+       __m128i xmm0, xmm1, xmm2, xmm3;\r
+\r
+       const __m128i mask128 = _mm_set_epi32(m1, m2, m3, m4);\r
+       for(size_t n = 0; n < count/4; ++n)\r
+       {\r
+               xmm0 = _mm_load_si128(source128++);     \r
+               xmm1 = _mm_load_si128(source128++);     \r
+               xmm2 = _mm_load_si128(source128++);     \r
+               xmm3 = _mm_load_si128(source128++);     \r
+\r
+               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm0, mask128));\r
+               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm1, mask128));\r
+               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm2, mask128));\r
+               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm3, mask128));\r
+       }\r
+       return dest;\r
+}\r
+\r
+}\r
+\r
+static void* fast_memsfhl(void* dest, const void* source, size_t count, int m1, int m2, int m3, int m4)\r
+{   \r
+       tbb::affinity_partitioner ap;\r
+       tbb::parallel_for(tbb::blocked_range<size_t>(0, count/128), [&](const tbb::blocked_range<size_t>& r)\r
+       {       \r
+               internal::fast_memsfhl(reinterpret_cast<char*>(dest) + r.begin()*128, reinterpret_cast<const char*>(source) + r.begin()*128, r.size()*128, m1, m2, m3, m4);   \r
+       }, ap);\r
+\r
+       return dest;\r
+}\r
+\r
+\r
+}
\ No newline at end of file
index a7dfa9001cc6bda6f0df25ac2d9cdf5c393190dd..132890004c2f8aa96410f81daf12ec887f4def5c 100644 (file)
@@ -33,6 +33,7 @@
 #include <common/exception/exceptions.h>\r
 #include <common/memory/memcpy.h>\r
 #include <common/memory/memclr.h>\r
+#include <common/memory/memshfl.h>\r
 \r
 #include <tbb/concurrent_queue.h>\r
 \r
@@ -109,32 +110,11 @@ public:
     STDMETHOD(GetAncillaryData(IDeckLinkVideoFrameAncillary** ancillary))                {return S_FALSE;}\r
 };\r
 \r
-void make_alpha(void* dest, const void* source, size_t count)\r
-{      \r
-       __m128i*           dest128 = reinterpret_cast<__m128i*>(dest);  \r
-       const __m128i* source128 = reinterpret_cast<const __m128i*>(source);\r
-\r
-       count /= 16; // 128 bit\r
-\r
-       __m128i xmm0, xmm1, xmm2, xmm3;\r
-\r
-       const __m128i mask128 = _mm_set_epi8(3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15);\r
-       for(size_t n = 0; n < count/4; ++n)\r
-       {\r
-               xmm0 = _mm_load_si128(source128++);     \r
-               xmm1 = _mm_load_si128(source128++);     \r
-               xmm2 = _mm_load_si128(source128++);     \r
-               xmm3 = _mm_load_si128(source128++);     \r
-\r
-               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm0, mask128));\r
-               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm1, mask128));\r
-               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm2, mask128));\r
-               _mm_stream_si128(dest128++, _mm_shuffle_epi8(xmm3, mask128));\r
-       }\r
-}\r
-\r
 std::shared_ptr<IDeckLinkVideoFrame> make_alpha_only_frame(const CComQIPtr<IDeckLinkOutput>& decklink, const safe_ptr<const core::read_frame>& frame, const core::video_format_desc& format_desc)\r
 {\r
+       if(static_cast<size_t>(frame->image_data().size()) != format_desc.size)\r
+               return std::make_shared<decklink_frame_adapter>(frame, format_desc);\r
+\r
        IDeckLinkMutableVideoFrame* result;\r
 \r
        if(FAILED(decklink->CreateVideoFrame(format_desc.width, format_desc.height, format_desc.size/format_desc.height, bmdFormat8BitBGRA, bmdFrameFlagDefault, &result)))\r
@@ -143,20 +123,8 @@ std::shared_ptr<IDeckLinkVideoFrame> make_alpha_only_frame(const CComQIPtr<IDeck
        void* bytes = nullptr;\r
        if(FAILED(result->GetBytes(&bytes)))\r
                BOOST_THROW_EXCEPTION(caspar_exception());\r
-               \r
-       unsigned char* data = reinterpret_cast<unsigned char*>(bytes);\r
 \r
-       if(static_cast<size_t>(frame->image_data().size()) == format_desc.size)\r
-       {\r
-               size_t count = frame->image_data().size();\r
-               tbb::affinity_partitioner ap;\r
-               tbb::parallel_for(tbb::blocked_range<size_t>(0, count/128), [&](const tbb::blocked_range<size_t>& r)\r
-               {       \r
-                       make_alpha(reinterpret_cast<char*>(data) + r.begin()*128, reinterpret_cast<const char*>(frame->image_data().begin()) + r.begin()*128, r.size()*128);   \r
-               }, ap);\r
-       }\r
-       else\r
-               memset(data, 0, format_desc.size);\r
+       fast_memsfhl(reinterpret_cast<unsigned char*>(bytes), frame->image_data().begin(), frame->image_data().size(), 0x03030303, 0x07070707, 0x0B0B0B0B, 0x0F0F0F0F);\r
 \r
        return std::shared_ptr<IDeckLinkVideoFrame>(result, [](IDeckLinkMutableVideoFrame* p) {p->Release();});\r
 }\r
index 1d0709b76e3b95f51fd2e641fd34f0eb7a1e9af6..d7f3e01b64a1f9f1d5d366c14031715cb8ef968a 100644 (file)
@@ -19,7 +19,7 @@
           <embedded-audio>true</embedded-audio>\r
           <latency>low</latency>\r
           <key>external</key>\r
-          <output>fill_and_key</output>\r
+          <output>key_only</output>\r
         </decklink>\r
         <!--<ogl>\r
           <device>1</device>\r