\r
struct audio_item\r
{\r
- const void* tag;\r
- frame_transform transform;\r
- std::vector<int32_t> audio_data;\r
+ const void* tag;\r
+ frame_transform transform;\r
+ audio_buffer audio_data;\r
};\r
\r
struct audio_mixer::implementation\r
transform_stack_.push(transform_stack_.top()*frame.get_frame_transform());\r
}\r
\r
- void visit(const core::write_frame& frame)\r
+ void visit(core::write_frame& frame)\r
{\r
// We only care about the last field.\r
if(format_desc_.field_mode == field_mode::upper && transform_stack_.top().field_mode == field_mode::upper)\r
audio_item item;\r
item.tag = frame.tag();\r
item.transform = transform_stack_.top();\r
- item.audio_data = std::vector<int32_t>(frame.audio_data().begin(), frame.audio_data().end());\r
+ item.audio_data = std::move(frame.audio_data());\r
\r
items.push_back(item); \r
}\r
}\r
\r
audio_buffer mix()\r
- {\r
- auto result = audio_buffer(format_desc_.audio_samples_per_frame, 0);\r
+ { \r
+ auto intermediate = std::vector<float, tbb::cache_aligned_allocator<float>>(format_desc_.audio_samples_per_frame+128, 0.0f);\r
\r
std::map<const void*, core::frame_transform> next_frame_transforms;\r
\r
continue;\r
\r
CASPAR_ASSERT(format_desc_.audio_channels == 2);\r
+ CASPAR_ASSERT(format_desc_.audio_samples_per_frame % 4 == 0);\r
\r
const float prev_volume = static_cast<float>(prev.volume);\r
const float next_volume = static_cast<float>(next.volume);\r
\r
tbb::parallel_for\r
(\r
- tbb::blocked_range<size_t>(0, format_desc_.audio_samples_per_frame/2),\r
+ tbb::blocked_range<size_t>(0, format_desc_.audio_samples_per_frame/4),\r
[&](const tbb::blocked_range<size_t>& r)\r
{\r
for(size_t n = r.begin(); n < r.end(); ++n)\r
{\r
- const float alpha = n * delta;\r
- const float volume = prev_volume * (1.0f - alpha) + next_volume * alpha;\r
+ const float alpha0 = (n*2) * delta;\r
+ const float volume0 = prev_volume * (1.0f - alpha0) + next_volume * alpha0;\r
+ const float volume1 = prev_volume * (1.0f - alpha0 + delta) + next_volume * (alpha0 + delta);\r
\r
- auto sample_epi32 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&item.audio_data[n*2]));\r
+ auto sample_epi32 = _mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4]));\r
\r
- auto sample_ps = _mm_cvtepi32_ps(sample_epi32); \r
- sample_ps = _mm_mul_ps(sample_ps, _mm_set1_ps(volume)); \r
+ auto sample_ps = _mm_cvtepi32_ps(sample_epi32); \r
+ sample_ps = _mm_mul_ps(sample_ps, _mm_setr_ps(volume1, volume1, volume0, volume0)); \r
\r
- auto res_sample_epi32 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&result[n*2]));\r
- auto res_sample_ps = _mm_cvtepi32_ps(res_sample_epi32); \r
+ auto res_sample_ps = _mm_load_ps(&intermediate[n*4]);\r
+ res_sample_ps = _mm_add_ps(sample_ps, res_sample_ps); \r
\r
- res_sample_ps = _mm_add_ps(sample_ps, res_sample_ps);\r
- res_sample_epi32 = _mm_cvtps_epi32(res_sample_ps);\r
- \r
- _mm_storel_epi64(reinterpret_cast<__m128i*>(&result[n*2]), res_sample_epi32);\r
+ _mm_store_ps(&intermediate[n*4], res_sample_ps);\r
}\r
}\r
);\r
}\r
+ \r
+ auto result = audio_buffer(format_desc_.audio_samples_per_frame+128, 0); \r
+\r
+ auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data());\r
+ auto result_128 = reinterpret_cast<__m128i*>(result.data());\r
+ for(size_t n = 0; n < format_desc_.audio_samples_per_frame/32; ++n)\r
+ {\r
+ auto xmm0 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm1 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm2 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm3 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm4 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm5 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm6 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm7 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ \r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm0));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm1));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm2));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm3));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm4));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm5));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm6));\r
+ _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm7));\r
+ }\r
\r
items.clear();\r
prev_frame_transforms_ = std::move(next_frame_transforms); \r