From 2f20f863e6b334d300d69cc41ff93170199b2491 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 19 Feb 2019 09:39:05 +0100 Subject: [PATCH] More tweaks to the memcpy_interleaved multiversioning. --- shared/memcpy_interleaved.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/shared/memcpy_interleaved.cpp b/shared/memcpy_interleaved.cpp index 4336942..4aba3c9 100644 --- a/shared/memcpy_interleaved.cpp +++ b/shared/memcpy_interleaved.cpp @@ -1,4 +1,4 @@ -#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) && !defined(__clang__) +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) #define HAS_MULTIVERSIONING 1 #endif @@ -26,12 +26,22 @@ void memcpy_interleaved_slow(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, #if HAS_MULTIVERSIONING +__attribute__((target("default"))) +size_t memcpy_interleaved_fastpath_core(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, const uint8_t *limit); + __attribute__((target("sse2"))) size_t memcpy_interleaved_fastpath_core(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, const uint8_t *limit); __attribute__((target("avx2"))) size_t memcpy_interleaved_fastpath_core(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, const uint8_t *limit); +__attribute__((target("default"))) +size_t memcpy_interleaved_fastpath_core(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, const uint8_t *limit) +{ + // No fast path possible unless we have SSE2 or higher. + return 0; +} + __attribute__((target("sse2"))) size_t memcpy_interleaved_fastpath_core(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, const uint8_t *limit) { @@ -100,7 +110,6 @@ size_t memcpy_interleaved_fastpath_core(uint8_t *dest1, uint8_t *dest2, const ui } // Returns the number of bytes consumed. -__attribute__((target("sse2", "avx2"))) size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n) { const uint8_t *limit = src + n; -- 2.39.2