-#include <cstdint>
#include <algorithm>
#include <assert.h>
+#include <cstdint>
#if __SSE2__
#include <immintrin.h>
#endif
assert(((limit - src) % 64) == 0);
#if __AVX2__
- const __m256i * __restrict in = (const __m256i *)src;
- __m256i * __restrict out1 = (__m256i *)dest1;
- __m256i * __restrict out2 = (__m256i *)dest2;
+ const __m256i *__restrict in = (const __m256i *)src;
+ __m256i *__restrict out1 = (__m256i *)dest1;
+ __m256i *__restrict out2 = (__m256i *)dest2;
__m256i shuffle_cw = _mm256_set_epi8(
15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,