\r
static void* fast_memcpy(void* dest, const void* source, size_t count)\r
{\r
- assert(count % 128 == 0);\r
assert(dest != nullptr);\r
assert(source != nullptr);\r
\r
\r
}\r
\r
-static void* fast_memcpy(void* dest, const void* source, size_t num)\r
+static void* fast_memcpy(void* dest, const void* source, size_t count)\r
{ \r
+ if((reinterpret_cast<int>(source) & 15) || (reinterpret_cast<int>(dest) & 15))\r
+ return memcpy(dest, source, count);\r
+\r
+ if(count < 2048)\r
+ return memcpy(dest, source, count);\r
+\r
+ size_t rest = count % 128;\r
+ count -= rest;\r
+\r
tbb::affinity_partitioner ap;\r
- tbb::parallel_for(tbb::blocked_range<size_t>(0, num/128), [&](const tbb::blocked_range<size_t>& r)\r
+ tbb::parallel_for(tbb::blocked_range<size_t>(0, count/128), [&](const tbb::blocked_range<size_t>& r)\r
{ \r
internal::fast_memcpy(reinterpret_cast<char*>(dest) + r.begin()*128, reinterpret_cast<const char*>(source) + r.begin()*128, r.size()*128); \r
- }, ap); \r
- return dest;\r
+ }, ap);\r
+\r
+ return memcpy(reinterpret_cast<char*>(dest)+count, reinterpret_cast<const char*>(source)+count, rest);\r
}\r
\r
\r