#pragma once\r
\r
#include "../utility/assert.h"\r
+#include "../memory/safe_ptr.h"\r
\r
#include <assert.h>\r
\r
return dest;\r
}\r
\r
-}\r
-\r
static void* fast_memcpy_small(void* dest, const void* source, size_t count)\r
{ \r
- if((reinterpret_cast<int>(source) & 15) || (reinterpret_cast<int>(dest) & 15))\r
- return memcpy(reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(source), count);\r
-\r
size_t rest = count & 127;\r
count &= ~127;\r
\r
return memcpy(reinterpret_cast<char*>(dest)+count, reinterpret_cast<const char*>(source)+count, rest);\r
}\r
\r
+}\r
+\r
+\r
static void* fast_memcpy(void* dest, const void* source, size_t count)\r
{ \r
if((reinterpret_cast<int>(source) & 15) || (reinterpret_cast<int>(dest) & 15))\r
internal::fast_memcpy(reinterpret_cast<char*>(dest) + n*512, reinterpret_cast<const char*>(source) + n*512, 512); \r
});\r
\r
- return fast_memcpy_small(reinterpret_cast<char*>(dest)+count, reinterpret_cast<const char*>(source)+count, rest);\r
+ return internal::fast_memcpy_small(reinterpret_cast<char*>(dest)+count, reinterpret_cast<const char*>(source)+count, rest);\r
}\r
\r
-\r
-static void* fast_memcpy_w_align_hack(void* dest, const void* source, size_t count)\r
+template<typename T>\r
+static safe_ptr<T> fast_memdup(const T* source, size_t count)\r
{ \r
- auto dest8 = reinterpret_cast<char*>(dest);\r
- auto source8 = reinterpret_cast<const char*>(source);\r
- \r
+ auto dest = safe_ptr<T>(reinterpret_cast<T*>(scalable_aligned_malloc(count + 16, 32)), scalable_free);\r
+ auto dest8 = reinterpret_cast<char*>(dest.get());\r
+ auto source8 = reinterpret_cast<const char*>(source); \r
auto source_align = reinterpret_cast<int>(source) & 15;\r
\r
- source8 -= source_align; \r
- \r
- size_t rest = count & 511;\r
- count &= ~511;\r
-\r
- Concurrency::parallel_for<int>(0, count / 512, [&](size_t n)\r
- { \r
- internal::fast_memcpy(dest8 + n*512, source8 + n*512, 512); \r
- });\r
-\r
- memcpy(dest8+count, source8+count, rest);\r
+ fast_memcpy(dest8, source8-source_align, count);\r
\r
- return dest8+source_align;\r
+ return safe_ptr<T>(reinterpret_cast<T*>(dest8+source_align), [dest](T*){});\r
}\r
\r
\r
\r
auto count = desc.planes.size();\r
std::array<uint8_t*, 4> org_ptrs;\r
- std::array<uint8_t*, 4> real_ptrs; // We need to store the "real" pointers, due to alignment hack.\r
+ std::array<safe_ptr<uint8_t>, 4> new_ptrs;\r
parallel_for<size_t>(0, count, [&](size_t n)\r
{\r
auto size = frame->linesize[n]*desc.planes[n].height;\r
+ new_ptrs[n] = fast_memdup(frame->data[n], size);\r
org_ptrs[n] = frame->data[n];\r
- real_ptrs[n] = reinterpret_cast<uint8_t*>(scalable_aligned_malloc(size+16, 32)); // Allocate 16 byte extra for alignment hack.\r
- frame->data[n] = reinterpret_cast<uint8_t*>(fast_memcpy_w_align_hack(real_ptrs[n], org_ptrs[n], size));\r
+ frame->data[n] = new_ptrs[n].get();\r
});\r
\r
- return safe_ptr<AVFrame>(frame.get(), [frame, org_ptrs, real_ptrs, count](AVFrame*)\r
+ return safe_ptr<AVFrame>(frame.get(), [frame, org_ptrs, new_ptrs, count](AVFrame*)\r
{\r
for(size_t n = 0; n < count; ++n)\r
- {\r
- scalable_aligned_free(real_ptrs[n]);\r
frame->data[n] = org_ptrs[n];\r
- }\r
});\r
}\r
\r