- std::vector<int16_t> audio16(input.size());\r
- auto audio32_ptr = reinterpret_cast<const uint32_t*>(input.begin());\r
- auto audio16_ptr = reinterpret_cast<uint32_t*>(audio16.data());\r
- auto size = input.size()/2;\r
- for(int n = 0; n < size; ++n) \r
- audio16_ptr[n] = (audio32_ptr[n*2+1] & 0xffff0000) | (audio32_ptr[n*2+0] >> 16); \r
+ std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio16(input.size());\r
+ auto audio32_ptr = reinterpret_cast<const __m128i*>(input.begin());\r
+ auto audio16_ptr = reinterpret_cast<__m128i*>(audio16.data());\r
+ auto size = input.size();\r
+ for(int n = 0; n < size/8; ++n) \r
+ {\r
+ auto xmm0 = _mm_srai_epi32(_mm_load_si128(audio32_ptr++), 16);\r
+ auto xmm1 = _mm_srai_epi32(_mm_load_si128(audio32_ptr++), 16);\r
+ auto xmm3 = _mm_packs_epi32(xmm0, xmm1);\r
+ _mm_store_si128(audio16_ptr++, xmm3);\r
+ }\r