X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=core%2Fmixer%2Faudio%2Faudio_mixer.cpp;h=4dccf01a7a5b652e177184b1b3ccbdbc02ee0dea;hb=155bb9004a3f60a946af0f0ce12552dc9a223d4b;hp=4904e1d03a150550d3abe8e564ae48e28587700b;hpb=0aaea04ee10d1189a8352d80d8e667e7673e9b46;p=casparcg diff --git a/core/mixer/audio/audio_mixer.cpp b/core/mixer/audio/audio_mixer.cpp index 4904e1d03..4dccf01a7 100644 --- a/core/mixer/audio/audio_mixer.cpp +++ b/core/mixer/audio/audio_mixer.cpp @@ -22,78 +22,65 @@ #include "audio_mixer.h" #include -#include +#include + +#include + +#include + +#include +#include namespace caspar { namespace core { + +struct audio_item +{ + const void* tag; + frame_transform transform; + audio_buffer audio_data; +}; struct audio_mixer::implementation { - std::deque> audio_data_; - std::stack transform_stack_; - - std::map prev_audio_transforms_; - std::map next_audio_transforms_; + std::stack transform_stack_; + std::map prev_frame_transforms_; + const core::video_format_desc format_desc_; + std::vector items; public: - implementation() + implementation(const core::video_format_desc& format_desc) + : format_desc_(format_desc) { - transform_stack_.push(core::audio_transform()); - - // 2 frames delay - audio_data_.push_back(std::vector()); - audio_data_.push_back(std::vector()); + transform_stack_.push(core::frame_transform()); } - void begin(const core::basic_frame& frame) + void begin(core::basic_frame& frame) { - transform_stack_.push(transform_stack_.top()*frame.get_audio_transform()); + transform_stack_.push(transform_stack_.top()*frame.get_frame_transform()); } - void visit(const core::write_frame& frame) + void visit(core::write_frame& frame) { - if(!transform_stack_.top().get_has_audio()) + // We only care about the last field. + if(format_desc_.field_mode == field_mode::upper && transform_stack_.top().field_mode == field_mode::upper) return; - auto& audio_data = frame.audio_data(); - auto tag = frame.tag(); // Get the identifier for the audio-stream. - - if(audio_data_.back().empty()) - audio_data_.back().resize(audio_data.size(), 0); - - auto next = transform_stack_.top(); - auto prev = next; + if(format_desc_.field_mode == field_mode::lower && transform_stack_.top().field_mode == field_mode::lower) + return; - auto it = prev_audio_transforms_.find(tag); - if(it != prev_audio_transforms_.end()) - prev = it->second; - - next_audio_transforms_[tag] = next; // Store all active tags, inactive tags will be removed in end_pass. - - auto next_gain = next.get_gain(); - auto prev_gain = prev.get_gain(); - - if(next_gain < 0.001 && prev_gain < 0.001) + // Skip empty audio. + if(transform_stack_.top().volume < 0.002 || frame.audio_data().empty()) return; - tbb::parallel_for - ( - tbb::blocked_range(0, audio_data.size()), - [&](const tbb::blocked_range& r) - { - for(size_t n = r.begin(); n < r.end(); ++n) - { - double alpha = static_cast(n)/static_cast(audio_data_.back().size()); - double sample_gain = prev_gain * (1.0 - alpha) + next_gain * alpha; - int sample = static_cast(audio_data[n]); - sample = (static_cast(sample_gain*static_cast(1<<15))*sample)>>15; - audio_data_.back()[n] = static_cast((static_cast(audio_data_.back()[n]) + sample) & 0xFFFF); - } - } - ); - } + audio_item item; + item.tag = frame.tag(); + item.transform = transform_stack_.top(); + item.audio_data = std::move(frame.audio_data()); + items.push_back(item); + } - void begin(const core::audio_transform& transform) + void begin(const core::frame_transform& transform) { transform_stack_.push(transform_stack_.top()*transform); } @@ -102,28 +89,120 @@ public: { transform_stack_.pop(); } + + audio_buffer mix() + { + // NOTE: auto data should be larger than format_desc_.audio_samples_per_frame to allow sse to read/write beyond size. - std::vector begin_pass() - { - auto result = std::move(audio_data_.front()); - audio_data_.pop_front(); + auto intermediate = std::vector>(format_desc_.audio_samples_per_frame+128, 0.0f); + + std::map next_frame_transforms; - audio_data_.push_back(std::vector()); + tbb::affinity_partitioner ap; - return result; - } + BOOST_FOREACH(auto& item, items) + { + const auto next = item.transform; + auto prev = next; - void end_pass() - { - prev_audio_transforms_ = std::move(next_audio_transforms_); + const auto it = prev_frame_transforms_.find(item.tag); + if(it != prev_frame_transforms_.end()) + prev = it->second; + + next_frame_transforms[item.tag] = next; // Store all active tags, inactive tags will be removed at the end. + + if(next.volume < 0.001 && prev.volume < 0.001) + continue; + + if(static_cast(item.audio_data.size()) != format_desc_.audio_samples_per_frame) + continue; + + CASPAR_ASSERT(format_desc_.audio_channels == 2); + CASPAR_ASSERT(format_desc_.audio_samples_per_frame % 4 == 0); + + const float prev_volume = static_cast(prev.volume); + const float next_volume = static_cast(next.volume); + const float delta = 1.0f/static_cast(format_desc_.audio_samples_per_frame/format_desc_.audio_channels); + + tbb::parallel_for + ( + tbb::blocked_range(0, format_desc_.audio_samples_per_frame/4), + [&](const tbb::blocked_range& r) + { + auto alpha_ps = _mm_setr_ps(delta, delta, 0.0f, 0.0f); + auto delta2_ps = _mm_set_ps1(delta*2.0f); + auto prev_ps = _mm_set_ps1(prev_volume); + auto next_ps = _mm_set_ps1(next_volume); + + for(size_t n = r.begin(); n < r.end(); ++n) + { + auto next2_ps = _mm_mul_ps(next_ps, alpha_ps); + auto prev2_ps = _mm_sub_ps(prev_ps, _mm_mul_ps(prev_ps, alpha_ps)); + auto volume_ps = _mm_add_ps(next2_ps, prev2_ps); + + auto sample_ps = _mm_cvtepi32_ps(_mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4]))); + auto res_sample_ps = _mm_load_ps(&intermediate[n*4]); + sample_ps = _mm_mul_ps(sample_ps, volume_ps); + res_sample_ps = _mm_add_ps(sample_ps, res_sample_ps); + + alpha_ps = _mm_add_ps(alpha_ps, delta2_ps); + + _mm_store_ps(&intermediate[n*4], res_sample_ps); + } + } + , ap); + } + + auto result = audio_buffer(format_desc_.audio_samples_per_frame+128, 0); + + tbb::parallel_for + ( + tbb::blocked_range(0, format_desc_.audio_samples_per_frame/32), + [&](const tbb::blocked_range& r) + { + auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data()+r.begin()*32); + auto result_128 = reinterpret_cast<__m128i*>(result.data()+r.begin()*32); + + for(size_t n = r.begin(); n < r.end(); ++n) + { + auto xmm0 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm1 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm2 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm3 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm4 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm5 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm6 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + auto xmm7 = _mm_load_ps(reinterpret_cast(intermediate_128++)); + + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm0)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm1)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm2)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm3)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm4)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm5)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm6)); + _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm7)); + } + } + , ap); + + items.clear(); + prev_frame_transforms_ = std::move(next_frame_transforms); + + result.resize(format_desc_.audio_samples_per_frame); + return std::move(result); } }; -audio_mixer::audio_mixer() : impl_(new implementation()){} -void audio_mixer::begin(const core::basic_frame& frame){impl_->begin(frame);} +audio_mixer::audio_mixer(const core::video_format_desc& format_desc) : impl_(new implementation(format_desc)){} +void audio_mixer::begin(core::basic_frame& frame){impl_->begin(frame);} void audio_mixer::visit(core::write_frame& frame){impl_->visit(frame);} void audio_mixer::end(){impl_->end();} -std::vector audio_mixer::begin_pass(){return impl_->begin_pass();} -void audio_mixer::end_pass(){impl_->end_pass();} +audio_buffer audio_mixer::mix(){return impl_->mix();} +audio_mixer& audio_mixer::operator=(audio_mixer&& other) +{ + impl_ = std::move(other.impl_); + return *this; +} }} \ No newline at end of file