\r
#include <boost/range/iterator_range.hpp>\r
\r
+#include <tbb/cache_aligned_allocator.h>\r
+\r
namespace caspar { namespace core {\r
\r
-static std::vector<int16_t> audio_32_to_16(const boost::iterator_range<int32_t*>& input)\r
+static std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio_32_to_16_sse(const boost::iterator_range<int32_t*>& input)\r
{ \r
- std::vector<int16_t> audio16(input.size());\r
- auto audio32_ptr = reinterpret_cast<const uint32_t*>(input.begin());\r
- auto audio16_ptr = reinterpret_cast<uint32_t*>(audio16.data());\r
- auto size = input.size()/2;\r
- for(int n = 0; n < size; ++n) \r
- audio16_ptr[n] = (audio32_ptr[n*2+1] & 0xffff0000) | (audio32_ptr[n*2+0] >> 16); \r
+ std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>> audio16(input.size());\r
+ auto audio32_ptr = reinterpret_cast<const __m128i*>(input.begin());\r
+ auto audio16_ptr = reinterpret_cast<__m128i*>(audio16.data());\r
+ auto size = input.size();\r
+ for(int n = 0; n < size/8; ++n) \r
+ {\r
+ auto xmm0 = _mm_srai_epi32(_mm_load_si128(audio32_ptr++), 16);\r
+ auto xmm1 = _mm_srai_epi32(_mm_load_si128(audio32_ptr++), 16);\r
+ auto xmm3 = _mm_packs_epi32(xmm0, xmm1);\r
+ _mm_store_si128(audio16_ptr++, xmm3);\r
+ }\r
return audio16;\r
}\r
\r
safe_ptr<diagnostics::graph> graph_;\r
boost::timer perf_timer_;\r
\r
- tbb::concurrent_bounded_queue<std::shared_ptr<std::vector<short>>> input_;\r
- boost::circular_buffer<std::vector<short>> container_;\r
+ tbb::concurrent_bounded_queue<std::shared_ptr<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>>> input_;\r
+ boost::circular_buffer<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>> container_;\r
tbb::atomic<bool> is_running_;\r
\r
core::video_format_desc format_desc_;\r
~oal_consumer()\r
{\r
is_running_ = false;\r
- input_.try_push(std::make_shared<std::vector<short>>());\r
- input_.try_push(std::make_shared<std::vector<short>>());\r
+ input_.try_push(std::make_shared<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>>());\r
+ input_.try_push(std::make_shared<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>>());\r
Stop();\r
CASPAR_LOG(info) << print() << L" Shutting down."; \r
}\r
{ \r
if(preroll_count_ < input_.capacity())\r
{\r
- while(input_.try_push(std::make_shared<std::vector<int16_t>>(format_desc_.audio_samples_per_frame, 0)))\r
+ while(input_.try_push(std::make_shared<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>>(format_desc_.audio_samples_per_frame, 0)))\r
++preroll_count_;\r
Play(); \r
}\r
\r
- input_.push(std::make_shared<std::vector<int16_t>>(core::audio_32_to_16(frame->audio_data())));\r
+ input_.push(std::make_shared<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>>(core::audio_32_to_16_sse(frame->audio_data())));\r
\r
return true;\r
}\r
\r
virtual bool OnGetData(sf::SoundStream::Chunk& data)\r
{ \r
- std::shared_ptr<std::vector<short>> audio_data; \r
+ std::shared_ptr<std::vector<int16_t, tbb::cache_aligned_allocator<int16_t>>> audio_data; \r
input_.pop(audio_data);\r
\r
container_.push_back(std::move(*audio_data));\r