7 #include <tbb/cache_aligned_allocator.h>
\r
9 namespace caspar { namespace core {
\r
11 // NOTE: Input data pointer should be larger than input.size() to allow sse to read beyond
\r
12 template<typename T>
\r
13 static std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio_32_to_16_sse(const T& audio_data)
\r
15 auto size = std::distance(std::begin(audio_data), std::end(audio_data));
\r
16 auto input32 = &(*std::begin(audio_data));
\r
17 auto output16 = std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>(size);
\r
19 auto input128 = reinterpret_cast<const __m128i*>(input32);
\r
20 auto output128 = reinterpret_cast<__m128i*>(output16.data());
\r
22 for(int n = 0; n < size/8; ++n)
\r
24 auto xmm0 = _mm_srai_epi32(_mm_load_si128(input128++), 16);
\r
25 auto xmm1 = _mm_srai_epi32(_mm_load_si128(input128++), 16);
\r
26 auto xmm3 = _mm_packs_epi32(xmm0, xmm1);
\r
27 _mm_store_si128(output128++, xmm3);
\r
30 for(int n = size/8; n < size; ++n)
\r
31 output16[n] = input32[n] >> 16;
\r
36 template<typename T>
\r
37 static std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>> audio_32_to_24(const T& audio_data)
\r
39 auto size = std::distance(std::begin(audio_data), std::end(audio_data));
\r
40 auto input8 = reinterpret_cast<const int8_t*>(&(*std::begin(audio_data)));
\r
41 auto output8 = std::vector<int8_t, tbb::cache_aligned_allocator<int8_t>>(size*3);
\r
43 for(int n = 0; n < size; ++n)
\r
45 output8[n*3+0] = input8[n*4+1];
\r
46 output8[n*3+1] = input8[n*4+2];
\r
47 output8[n*3+2] = input8[n*4+3];
\r