From 77c5e19de35d9d3d9b470113e9f194ebe83b0f0b Mon Sep 17 00:00:00 2001 From: ronag Date: Sat, 22 Oct 2011 20:28:39 +0000 Subject: [PATCH] 2.0.0: Fixed potential audio-mixer bug which could happen when parallel loop was not affinitated with a single thread. git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches/2.0.0.2@1381 362d55ac-95cf-4e76-9f9a-cbaa9c17b72d --- core/mixer/audio/audio_mixer.cpp | 104 +++++++++++++------------------ 1 file changed, 45 insertions(+), 59 deletions(-) diff --git a/core/mixer/audio/audio_mixer.cpp b/core/mixer/audio/audio_mixer.cpp index 4dccf01a7..5a695e61c 100644 --- a/core/mixer/audio/audio_mixer.cpp +++ b/core/mixer/audio/audio_mixer.cpp @@ -110,7 +110,7 @@ public: prev = it->second; next_frame_transforms[item.tag] = next; // Store all active tags, inactive tags will be removed at the end. - + if(next.volume < 0.001 && prev.volume < 0.001) continue; @@ -123,68 +123,54 @@ public: const float prev_volume = static_cast(prev.volume); const float next_volume = static_cast(next.volume); const float delta = 1.0f/static_cast(format_desc_.audio_samples_per_frame/format_desc_.audio_channels); - - tbb::parallel_for - ( - tbb::blocked_range(0, format_desc_.audio_samples_per_frame/4), - [&](const tbb::blocked_range& r) - { - auto alpha_ps = _mm_setr_ps(delta, delta, 0.0f, 0.0f); - auto delta2_ps = _mm_set_ps1(delta*2.0f); - auto prev_ps = _mm_set_ps1(prev_volume); - auto next_ps = _mm_set_ps1(next_volume); - - for(size_t n = r.begin(); n < r.end(); ++n) - { - auto next2_ps = _mm_mul_ps(next_ps, alpha_ps); - auto prev2_ps = _mm_sub_ps(prev_ps, _mm_mul_ps(prev_ps, alpha_ps)); - auto volume_ps = _mm_add_ps(next2_ps, prev2_ps); - - auto sample_ps = _mm_cvtepi32_ps(_mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4]))); - auto res_sample_ps = _mm_load_ps(&intermediate[n*4]); - sample_ps = _mm_mul_ps(sample_ps, volume_ps); - res_sample_ps = _mm_add_ps(sample_ps, res_sample_ps); - - alpha_ps = _mm_add_ps(alpha_ps, delta2_ps); - - _mm_store_ps(&intermediate[n*4], res_sample_ps); - } - } - , ap); + + auto alpha_ps = _mm_setr_ps(delta, delta, 0.0f, 0.0f); + auto delta2_ps = _mm_set_ps1(delta*2.0f); + auto prev_ps = _mm_set_ps1(prev_volume); + auto next_ps = _mm_set_ps1(next_volume); + + for(size_t n = 0; n < format_desc_.audio_samples_per_frame/4; ++n) + { + auto next2_ps = _mm_mul_ps(next_ps, alpha_ps); + auto prev2_ps = _mm_sub_ps(prev_ps, _mm_mul_ps(prev_ps, alpha_ps)); + auto volume_ps = _mm_add_ps(next2_ps, prev2_ps); + + auto sample_ps = _mm_cvtepi32_ps(_mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4]))); + auto res_sample_ps = _mm_load_ps(&intermediate[n*4]); + sample_ps = _mm_mul_ps(sample_ps, volume_ps); + res_sample_ps = _mm_add_ps(sample_ps, res_sample_ps); + + alpha_ps = _mm_add_ps(alpha_ps, delta2_ps); + + _mm_store_ps(&intermediate[n*4], res_sample_ps); + } } - auto result = audio_buffer(format_desc_.audio_samples_per_frame+128, 0); - - tbb::parallel_for - ( - tbb::blocked_range(0, format_desc_.audio_samples_per_frame/32), - [&](const tbb::blocked_range& r) - { - auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data()+r.begin()*32); - auto result_128 = reinterpret_cast<__m128i*>(result.data()+r.begin()*32); + auto result = audio_buffer(format_desc_.audio_samples_per_frame+128); + + auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data()); + auto result_128 = reinterpret_cast<__m128i*>(result.data()); - for(size_t n = r.begin(); n < r.end(); ++n) - { - auto xmm0 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm1 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm2 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm3 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm4 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm5 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm6 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - auto xmm7 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + for(size_t n = 0; n < format_desc_.audio_samples_per_frame/32; ++n) + { + auto xmm0 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm1 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm2 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm3 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm4 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm5 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm6 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm7 = _mm_load_ps(reinterpret_cast(intermediate_128++)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm0)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm1)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm2)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm3)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm4)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm5)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm6)); - _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm7)); - } - } - , ap); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm0)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm1)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm2)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm3)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm4)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm5)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm6)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm7)); + } items.clear(); prev_frame_transforms_ = std::move(next_frame_transforms); -- 2.39.2