X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=memcpy_interleaved.cpp;h=9634fd26b88ddcb95ca4d5c5afcedb79cb21747c;hb=3795723be95f2fe82f3c8b8b45b1a905b2c811fd;hp=9a41cdd53ac1e5ba93330c9657a2d670171e910c;hpb=cf158af1c2219bd9f5a9bc531fb3c1133d327b45;p=nageru diff --git a/memcpy_interleaved.cpp b/memcpy_interleaved.cpp index 9a41cdd..9634fd2 100644 --- a/memcpy_interleaved.cpp +++ b/memcpy_interleaved.cpp @@ -1,6 +1,6 @@ -#include #include #include +#include #if __SSE2__ #include #endif @@ -56,9 +56,9 @@ size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t assert(((limit - src) % 64) == 0); #if __AVX2__ - const __m256i * __restrict in = (const __m256i *)src; - __m256i * __restrict out1 = (__m256i *)dest1; - __m256i * __restrict out2 = (__m256i *)dest2; + const __m256i *__restrict in = (const __m256i *)src; + __m256i *__restrict out1 = (__m256i *)dest1; + __m256i *__restrict out2 = (__m256i *)dest2; __m256i shuffle_cw = _mm256_set_epi8( 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,