7 #include <tbb/cache_aligned_allocator.h>
\r
9 namespace caspar { namespace core {
\r
11 // NOTE: Input data pointer should be larger than input.size() to allow sse to read beyond
\r
12 template<typename T>
\r
13 static std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio_32_to_16_sse(const T& audio_data)
\r
15 auto size = std::distance(std::begin(audio_data), std::end(audio_data));
\r
16 auto input32 = &(*std::begin(audio_data));
\r
17 auto output16 = std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>(size);
\r
19 auto input128 = reinterpret_cast<const __m128i*>(input32);
\r
20 auto output128 = reinterpret_cast<__m128i*>(output16.data());
\r
22 for(int n = 0; n < size/8; ++n)
\r
24 auto xmm0 = _mm_srai_epi32(_mm_load_si128(input128++), 16);
\r
25 auto xmm1 = _mm_srai_epi32(_mm_load_si128(input128++), 16);
\r
26 auto xmm3 = _mm_packs_epi32(xmm0, xmm1);
\r
27 _mm_store_si128(output128++, xmm3);
\r
30 for(int n = size/8; n < size; ++n)
\r
31 output16[n] = input32[n] >> 16;
\r
36 template<typename T>
\r
37 static std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>> audio_32_to_24(const T& audio_data)
\r
39 auto size = std::distance(std::begin(audio_data), std::end(audio_data));
\r
40 auto input8 = reinterpret_cast<const int8_t*>(&(*std::begin(audio_data)));
\r
41 auto output8 = std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>>();
\r
43 output8.reserve(size*3);
\r
44 for(int n = 0; n < size; ++n)
\r
46 output8.push_back(input8[n*4+1]);
\r
47 output8.push_back(input8[n*4+2]);
\r
48 output8.push_back(input8[n*4+3]);
\r
54 template<typename T>
\r
55 static std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>> audio_32_to_16(const T& audio_data)
\r
57 auto size = std::distance(std::begin(audio_data), std::end(audio_data));
\r
58 auto input8 = reinterpret_cast<const int8_t*>(&(*std::begin(audio_data)));
\r
59 auto output8 = std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>>();
\r
61 output8.reserve(size*2);
\r
62 for(int n = 0; n < size; ++n)
\r
64 output8.push_back(input8[n*4+2]);
\r
65 output8.push_back(input8[n*4+3]);
\r