]> git.sesse.net Git - casparcg/commitdiff
git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches...
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Mon, 24 Oct 2011 22:55:25 +0000 (22:55 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Mon, 24 Oct 2011 22:55:25 +0000 (22:55 +0000)
common/memory/memcpy.h
modules/ffmpeg/producer/util.cpp
shell/main.cpp

index 89b6584d6fb43c1482bc750642ae498f7f803b9a..bc802b48c726209032d356cd887399512e3a2f6f 100644 (file)
@@ -28,9 +28,9 @@
 \r
 namespace caspar {\r
 \r
-namespace internal {\r
+namespace detail {\r
 \r
-static void* fast_memcpy(void* dest, const void* source, size_t count)\r
+static void* fast_memcpy_aligned_impl(void* dest, const void* source, size_t count)\r
 {\r
        CASPAR_ASSERT(dest != nullptr);\r
        CASPAR_ASSERT(source != nullptr);\r
@@ -77,52 +77,123 @@ static void* fast_memcpy(void* dest, const void* source, size_t count)
        return dest;\r
 }\r
 \r
-static void* fast_memcpy_small(void* dest, const void* source, size_t count)\r
+\r
+static void* fast_memcpy_unaligned_impl(void* dest, const void* source, size_t count)\r
+{\r
+       CASPAR_ASSERT(dest != nullptr);\r
+       CASPAR_ASSERT(source != nullptr);\r
+\r
+       if(count == 0)\r
+               return dest;\r
+\r
+       __asm   \r
+       {      \r
+               mov esi, source;          \r
+               mov edi, dest;    \r
+               mov ebx, count;     \r
+               shr ebx, 7;\r
+\r
+               cpy:             \r
+                       movdqu xmm0, [esi+00h];       \r
+                       movdqu xmm1, [esi+10h];      \r
+                       movdqu xmm2, [esi+20h];         \r
+                       movdqu xmm3, [esi+30h];   \r
+\r
+                       movdqu [edi+00h], xmm0;\r
+                       movdqu [edi+10h], xmm1;\r
+                       movdqu [edi+20h], xmm2;    \r
+                       movdqu [edi+30h], xmm3;\r
+\r
+                       movdqu xmm4, [esi+40h];\r
+                       movdqu xmm5, [esi+50h];\r
+                       movdqu xmm6, [esi+60h];\r
+                       movdqu xmm7, [esi+70h];  \r
+\r
+                       movdqu [edi+40h], xmm4; \r
+                       movdqu [edi+50h], xmm5;      \r
+                       movdqu [edi+60h], xmm6;    \r
+                       movdqu [edi+70h], xmm7;    \r
+\r
+                       lea edi, [edi+80h];       \r
+                       lea esi, [esi+80h];      \r
+\r
+                       dec ebx;      \r
+               jnz cpy;  \r
+       }   \r
+       return dest;\r
+}\r
+\r
+static void* fast_memcpy_small_aligned(char* dest8, const char* source8, size_t count)\r
 {   \r
        size_t rest = count & 127;\r
        count &= ~127;\r
 \r
-       internal::fast_memcpy(reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(source), count);   \r
-       return memcpy(reinterpret_cast<char*>(dest)+count,  reinterpret_cast<const char*>(source)+count, rest);\r
+       fast_memcpy_aligned_impl(dest8, source8, count);   \r
+\r
+       return memcpy(dest8+count,  source8+count, rest);\r
 }\r
 \r
+static void* fast_memcpy_small_unaligned(char* dest8, const char* source8, size_t count)\r
+{   \r
+       size_t rest = count & 127;\r
+       count &= ~127;\r
+\r
+       fast_memcpy_unaligned_impl(dest8, source8, count);   \r
+\r
+       return memcpy(dest8+count,  source8+count, rest);\r
+}\r
+\r
+static void* fast_memcpy_aligned(void* dest, const void* source, size_t count)\r
+{   \r
+       auto dest8                      = reinterpret_cast<char*>(dest);\r
+       auto source8            = reinterpret_cast<const char*>(source);\r
+               \r
+       size_t rest = count & 2047;\r
+       count &= ~2047;\r
+\r
+       Concurrency::parallel_for<size_t>(0, count / 2048, [&](size_t n)\r
+       {       \r
+               detail::fast_memcpy_aligned_impl(dest8 + n*2048, source8 + n*2048, 2048);   \r
+       });\r
+\r
+       return detail::fast_memcpy_small_aligned(dest8+count, source8+count, rest);\r
 }\r
 \r
-static void* fast_memcpy(void* dest, const void* source, size_t count)\r
+static void* fast_memcpy_unaligned(void* dest, const void* source, size_t count)\r
 {   \r
-       if((reinterpret_cast<int>(source) & 15) || (reinterpret_cast<int>(dest) & 15) || count < 128)\r
-               return memcpy(reinterpret_cast<char*>(dest),  reinterpret_cast<const char*>(source), count);\r
-       \r
-       size_t rest = count & 511;\r
-       count &= ~511;\r
+       auto dest8                      = reinterpret_cast<char*>(dest);\r
+       auto source8            = reinterpret_cast<const char*>(source);\r
+               \r
+       size_t rest = count & 2047;\r
+       count &= ~2047;\r
 \r
-       Concurrency::parallel_for<int>(0, count / 512, [&](size_t n)\r
+       Concurrency::parallel_for<size_t>(0, count / 2048, [&](size_t n)\r
        {       \r
-               internal::fast_memcpy(reinterpret_cast<char*>(dest) + n*512, reinterpret_cast<const char*>(source) + n*512, 512);   \r
+               detail::fast_memcpy_unaligned_impl(dest8 + n*2048, source8 + n*2048, 2048);   \r
        });\r
 \r
-       return internal::fast_memcpy_small(reinterpret_cast<char*>(dest)+count,  reinterpret_cast<const char*>(source)+count, rest);\r
+       return detail::fast_memcpy_small_unaligned(dest8+count, source8+count, rest);\r
+}\r
+\r
 }\r
 \r
 template<typename T>\r
-static safe_ptr<T> fast_memdup(const T* source, size_t count)\r
+T* fast_memcpy(T* dest, const void* source, size_t count)\r
+{   \r
+       if((reinterpret_cast<int>(source) & 15) || (reinterpret_cast<int>(dest) & 15))\r
+               return reinterpret_cast<T*>(detail::fast_memcpy_unaligned(dest, source, count));\r
+       else\r
+               return reinterpret_cast<T*>(detail::fast_memcpy_aligned(dest, source, count));\r
+}\r
+\r
+template<typename T>\r
+safe_ptr<T> fast_memdup(const T* source, size_t count)\r
 {      \r
-       auto dest                       = reinterpret_cast<T*>(scalable_aligned_malloc(count + 16, 32));\r
-       auto dest8                      = reinterpret_cast<char*>(dest);\r
-       auto source8            = reinterpret_cast<const char*>(source);        \r
        auto source_align       = reinterpret_cast<int>(source) & 15;\r
-               \r
-       try\r
-       {\r
-               fast_memcpy(dest8, source8-source_align, count+source_align);\r
-       }\r
-       catch(...)\r
-       {\r
-               scalable_free(dest);\r
-               throw;\r
-       }\r
-\r
-       return safe_ptr<T>(reinterpret_cast<T*>(dest8+source_align), [dest](T*){scalable_free(dest);});\r
+       auto dest8                      = reinterpret_cast<char*>(scalable_aligned_malloc(count + source_align, 32));\r
+       auto source8            = reinterpret_cast<const char*>(source);        \r
+       detail::fast_memcpy_aligned(dest8, source8-source_align, count+source_align);\r
+       return safe_ptr<T>(reinterpret_cast<T*>(dest8+source_align), [dest8](T*){scalable_free(dest8);});\r
 }\r
 \r
 \r
index bd61a8474029677dee0e9705e4e0dfe577f72383..e35ae7c45bfa9621c7b7cf51d6ce21c22658496c 100644 (file)
@@ -193,11 +193,18 @@ safe_ptr<core::write_frame> make_write_frame(const void* tag, const safe_ptr<AVF
                        auto decoded          = decoded_frame->data[n];\r
                        auto decoded_linesize = decoded_frame->linesize[n];\r
                                \r
-                       // Copy line by line since ffmpeg sometimes pads each line.\r
-                       Concurrency::parallel_for(0, static_cast<int>(desc.planes[n].height), [&](size_t y)\r
+                       if(decoded_linesize != static_cast<int>(plane.width))\r
                        {\r
-                               fast_memcpy(result + y*plane.linesize, decoded + y*decoded_linesize, plane.linesize);\r
-                       });\r
+                               // Copy line by line since ffmpeg sometimes pads each line.\r
+                               Concurrency::parallel_for<size_t>(0, desc.planes[n].height, [&](size_t y)\r
+                               {\r
+                                       fast_memcpy(result + y*plane.linesize, decoded + y*decoded_linesize, plane.linesize);\r
+                               });\r
+                       }\r
+                       else\r
+                       {\r
+                               fast_memcpy(result, decoded, plane.size);\r
+                       }\r
 \r
                        write->commit(n);\r
                }\r
index aaf828059415ef32a75c20740fe03a6a3ab1f8e7..d699eb85e9dc5e9248c35890a1b35559062fd77c 100644 (file)
@@ -296,7 +296,7 @@ int main(int argc, wchar_t* argv[])
        }       \r
        \r
        CASPAR_LOG(info) << "Successfully shutdown CasparCG Server.";\r
-       Sleep(100); // CAPSAR_LOG is asynchronous. Try to get text in correct order.\r
+       Sleep(500); // CAPSAR_LOG is asynchronous. Try to get text in correct order.\r
        system("pause");\r
        return 0;\r
 }
\ No newline at end of file