return dest;\r
}\r
\r
-void* copy(void* dest, const void* source, size_t num)\r
+void* aligned_memcpy(void* dest, const void* source, size_t num)\r
{ \r
+ if(num < 128)\r
+ return memcpy(dest, source, num);\r
+\r
tbb::parallel_for(tbb::blocked_range<size_t>(0, num/128), [&](const tbb::blocked_range<size_t>& r)\r
{\r
memcpy_SSE2(reinterpret_cast<char*>(dest) + r.begin()*128, reinterpret_cast<const char*>(source) + r.begin()*128, r.size()*128);\r