#pragma once\r
\r
+#include <algorithm>\r
#include <vector>\r
\r
#include <stdint.h>\r
#include <tbb/cache_aligned_allocator.h>\r
\r
namespace caspar { namespace core {\r
-\r
-// NOTE: Input data pointer should be larger than input.size() to allow sse to read beyond\r
-template<typename T>\r
-static std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio_32_to_16_sse(const T& audio_data)\r
-{ \r
- auto size = std::distance(std::begin(audio_data), std::end(audio_data));\r
- auto input32 = &(*std::begin(audio_data));\r
- auto output16 = std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>(size);\r
-\r
- auto input128 = reinterpret_cast<const __m128i*>(input32);\r
- auto output128 = reinterpret_cast<__m128i*>(output16.data());\r
-\r
- for(int n = 0; n < size/8; ++n) \r
- {\r
- auto xmm0 = _mm_srai_epi32(_mm_load_si128(input128++), 16);\r
- auto xmm1 = _mm_srai_epi32(_mm_load_si128(input128++), 16);\r
- auto xmm3 = _mm_packs_epi32(xmm0, xmm1);\r
- _mm_store_si128(output128++, xmm3);\r
- }\r
-\r
- for(int n = size/8; n < size; ++n)\r
- output16[n] = input32[n] >> 16;\r
-\r
- return output16;\r
-}\r
-\r
+ \r
template<typename T>\r
static std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>> audio_32_to_24(const T& audio_data)\r
{ \r
}\r
\r
template<typename T>\r
-static std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>> audio_32_to_16(const T& audio_data)\r
+static std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio_32_to_16(const T& audio_data)\r
{ \r
auto size = std::distance(std::begin(audio_data), std::end(audio_data));\r
- auto input8 = reinterpret_cast<const int8_t*>(&(*std::begin(audio_data)));\r
- auto output8 = std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>>();\r
+ auto input32 = &(*std::begin(audio_data));\r
+ auto output16 = std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>();\r
\r
- output8.reserve(size*2);\r
+ output16.reserve(size);\r
for(int n = 0; n < size; ++n)\r
- {\r
- output8.push_back(input8[n*4+2]);\r
- output8.push_back(input8[n*4+3]);\r
- }\r
+ output16.push_back((input32[n] >> 16) & 0xFFFF);\r
\r
- return output8;\r
+ return output16;\r
}\r
\r
}}
\ No newline at end of file