X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=core%2Fmixer%2Faudio%2Faudio_mixer.cpp;h=50ce413c4f707393c78215358209cdba00482129;hb=4339e2b3466b78ed27cbe88592eb2e247c5c13a4;hp=a241ad39773fde6378eeb8c7841319dda8e03839;hpb=2f549904ed6da11061fa914fc99f0e0acdd90357;p=casparcg

diff --git a/core/mixer/audio/audio_mixer.cpp b/core/mixer/audio/audio_mixer.cpp
index a241ad397..50ce413c4 100644
--- a/core/mixer/audio/audio_mixer.cpp
+++ b/core/mixer/audio/audio_mixer.cpp
@@ -26,6 +26,8 @@
 
 #include <tbb/parallel_for.h>
 
+#include <safeint.h>
+
 #include <stack>
 #include <deque>
 
@@ -33,21 +35,17 @@ namespace caspar { namespace core {
 
 struct audio_item
 {
-	const void*				tag;
-	frame_transform			transform;
-	std::vector<int16_t>	audio_data;
+	const void*			tag;
+	frame_transform		transform;
+	audio_buffer		audio_data;
 };
 	
 struct audio_mixer::implementation
 {
-	std::stack<core::frame_transform> transform_stack_;
-
-	std::map<const void*, core::frame_transform> prev_frame_transforms_;
-	std::map<const void*, core::frame_transform> next_frame_transforms_;
-
-	const core::video_format_desc format_desc_;
-
-	std::vector<audio_item> items;
+	std::stack<core::frame_transform>				transform_stack_;
+	std::map<const void*, core::frame_transform>	prev_frame_transforms_;
+	const core::video_format_desc					format_desc_;
+	std::vector<audio_item>							items;
 
 public:
 	implementation(const core::video_format_desc& format_desc)
@@ -61,7 +59,7 @@ public:
 		transform_stack_.push(transform_stack_.top()*frame.get_frame_transform());
 	}
 
-	void visit(const core::write_frame& frame)
+	void visit(core::write_frame& frame)
 	{
 		// We only care about the last field.
 		if(format_desc_.field_mode == field_mode::upper && transform_stack_.top().field_mode == field_mode::upper)
@@ -77,7 +75,7 @@ public:
 		audio_item item;
 		item.tag		= frame.tag();
 		item.transform	= transform_stack_.top();
-		item.audio_data = std::vector<int16_t>(frame.audio_data().begin(), frame.audio_data().end());
+		item.audio_data = std::move(frame.audio_data());
 
 		items.push_back(item);		
 	}
@@ -92,9 +90,11 @@ public:
 		transform_stack_.pop();
 	}
 	
-	std::vector<int16_t> mix()
-	{
-		auto result = std::vector<int16_t>(format_desc_.audio_samples_per_frame);
+	audio_buffer mix()
+	{	
+		auto intermediate = std::vector<float, tbb::cache_aligned_allocator<float>>(format_desc_.audio_samples_per_frame+128, 0.0f);
+
+		std::map<const void*, core::frame_transform> next_frame_transforms;
 
 		BOOST_FOREACH(auto& item, items)
 		{				
@@ -105,42 +105,74 @@ public:
 			if(it != prev_frame_transforms_.end())
 				prev = it->second;
 				
-			next_frame_transforms_[item.tag] = next; // Store all active tags, inactive tags will be removed at the end.
+			next_frame_transforms[item.tag] = next; // Store all active tags, inactive tags will be removed at the end.
 				
 			if(next.volume < 0.001 && prev.volume < 0.001)
 				continue;
-		
-			static const int BASE = 1<<15;
-
-			const auto next_volume = static_cast<int>(next.volume*BASE);
-			const auto prev_volume = static_cast<int>(prev.volume*BASE);
-		
-			const int n_samples = result.size();
-		
-			const auto in_size = static_cast<size_t>(item.audio_data.size());
-			CASPAR_VERIFY(in_size == 0 || in_size == result.size());
-
-			if(in_size > result.size())
+									
+			if(static_cast<size_t>(item.audio_data.size()) != format_desc_.audio_samples_per_frame)
 				continue;
 
+			CASPAR_ASSERT(format_desc_.audio_channels == 2);
+			CASPAR_ASSERT(format_desc_.audio_samples_per_frame % 4 == 0);
+						
+			const float prev_volume = static_cast<float>(prev.volume);
+			const float next_volume = static_cast<float>(next.volume);
+			const float delta		= 1.0f/static_cast<float>(format_desc_.audio_samples_per_frame/2);
+			
 			tbb::parallel_for
 			(
-				tbb::blocked_range<size_t>(0, item.audio_data.size()),
+				tbb::blocked_range<size_t>(0, format_desc_.audio_samples_per_frame/4),
 				[&](const tbb::blocked_range<size_t>& r)
 				{
 					for(size_t n = r.begin(); n < r.end(); ++n)
 					{
-						const int sample_volume = (prev_volume - (prev_volume * n)/n_samples) + (next_volume * n)/n_samples;
-						const int sample = (static_cast<int>(item.audio_data[n])*sample_volume)/BASE;
-						result[n] = static_cast<int16_t>((static_cast<int>(result[n]) + sample) & 0xFFFF);
+						const float alpha0	= (n*2) * delta;
+						const float volume0	= prev_volume * (1.0f - alpha0) + next_volume * alpha0;
+						const float volume1	= prev_volume * (1.0f - alpha0 + delta) + next_volume * (alpha0 + delta);
+
+						auto sample_epi32	= _mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4]));
+						auto res_sample_ps	= _mm_load_ps(&intermediate[n*4]);
+
+						auto sample_ps		= _mm_cvtepi32_ps(sample_epi32);												
+						sample_ps			= _mm_mul_ps(sample_ps, _mm_setr_ps(volume1, volume1, volume0, volume0));	
+						res_sample_ps		= _mm_add_ps(sample_ps, res_sample_ps);	
+
+						_mm_store_ps(&intermediate[n*4], res_sample_ps);
 					}
 				}
 			);
 		}
+		
+		auto result = audio_buffer(format_desc_.audio_samples_per_frame+128, 0);	
+
+		auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data());
+		auto result_128		  = reinterpret_cast<__m128i*>(result.data());
+		for(size_t n = 0; n < format_desc_.audio_samples_per_frame/32; ++n)
+		{
+			auto xmm0 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm1 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm2 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm3 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm4 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm5 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm6 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			auto xmm7 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));
+			
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm0));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm1));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm2));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm3));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm4));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm5));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm6));
+			_mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm7));
+		}
 
 		items.clear();
-		prev_frame_transforms_ = std::move(next_frame_transforms_);	
+		prev_frame_transforms_ = std::move(next_frame_transforms);	
 
+		result.resize(format_desc_.audio_samples_per_frame);
 		return std::move(result);
 	}
 };
@@ -149,7 +181,7 @@ audio_mixer::audio_mixer(const core::video_format_desc& format_desc) : impl_(new
 void audio_mixer::begin(core::basic_frame& frame){impl_->begin(frame);}
 void audio_mixer::visit(core::write_frame& frame){impl_->visit(frame);}
 void audio_mixer::end(){impl_->end();}
-std::vector<int16_t> audio_mixer::mix(){return impl_->mix();}
+audio_buffer audio_mixer::mix(){return impl_->mix();}
 audio_mixer& audio_mixer::operator=(audio_mixer&& other)
 {
 	impl_ = std::move(other.impl_);