]> git.sesse.net Git - casparcg/commitdiff
2.0.2: audio_mixer: Simplified.
authorronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Thu, 17 Nov 2011 19:06:51 +0000 (19:06 +0000)
committerronag <ronag@362d55ac-95cf-4e76-9f9a-cbaa9c17b72d>
Thu, 17 Nov 2011 19:06:51 +0000 (19:06 +0000)
git-svn-id: https://casparcg.svn.sourceforge.net/svnroot/casparcg/server/branches/2.0.2@1577 362d55ac-95cf-4e76-9f9a-cbaa9c17b72d

core/mixer/audio/audio_mixer.cpp

index 2f4706d1f6eb823029d9bb7a0c7a6a04d349852b..983c6e7238f1d02e52ed319d8d2e5c420a619a05 100644 (file)
@@ -92,11 +92,12 @@ public:
        {       \r
                // NOTE: auto data should be larger than format_desc_.audio_samples_per_frame to allow sse to read/write beyond size.\r
 \r
-               auto intermediate = std::vector<float, tbb::cache_aligned_allocator<float>>(format_desc_.audio_samples_per_frame+128, 0.0f);\r
-               auto result               = audio_buffer(format_desc_.audio_samples_per_frame+128);     \r
+               auto intermediate       = std::vector<float, tbb::cache_aligned_allocator<float>>(format_desc_.audio_samples_per_frame+128, 0.0f);\r
+               auto result                     = audio_buffer(format_desc_.audio_samples_per_frame+128);       \r
+               auto result_128         = reinterpret_cast<__m128i*>(result.data());\r
 \r
                std::map<const void*, core::frame_transform> next_frame_transforms;\r
-               \r
+                                               \r
                BOOST_FOREACH(auto& item, items_)\r
                {                       \r
                        const auto next = item.transform;\r
@@ -124,42 +125,33 @@ public:
                        auto alpha_ps   = _mm_set_ps1(alpha*2.0f);\r
                        auto volume_ps  = _mm_setr_ps(prev_volume, prev_volume, prev_volume+alpha, prev_volume+alpha);\r
 \r
-                       for(size_t n = 0; n < format_desc_.audio_samples_per_frame/4; ++n)\r
-                       {               \r
-                               auto sample_ps          = _mm_cvtepi32_ps(_mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4])));\r
-                               auto res_sample_ps      = _mm_load_ps(&intermediate[n*4]);                                                                                      \r
-                               sample_ps                       = _mm_mul_ps(sample_ps, volume_ps);     \r
-                               res_sample_ps           = _mm_add_ps(sample_ps, res_sample_ps); \r
+                       if(&item != &items_.back())\r
+                       {\r
+                               for(size_t n = 0; n < format_desc_.audio_samples_per_frame/4; ++n)\r
+                               {               \r
+                                       auto sample_ps          = _mm_cvtepi32_ps(_mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4])));\r
+                                       auto res_sample_ps      = _mm_load_ps(&intermediate[n*4]);                                                                                      \r
+                                       sample_ps                       = _mm_mul_ps(sample_ps, volume_ps);     \r
+                                       res_sample_ps           = _mm_add_ps(sample_ps, res_sample_ps); \r
 \r
-                               volume_ps                       = _mm_add_ps(volume_ps, alpha_ps);\r
+                                       volume_ps                       = _mm_add_ps(volume_ps, alpha_ps);\r
 \r
-                               _mm_store_ps(&intermediate[n*4], res_sample_ps);\r
+                                       _mm_store_ps(&intermediate[n*4], res_sample_ps);\r
+                               }\r
                        }\r
-               }               \r
-                       \r
-               auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data());\r
-               auto result_128           = reinterpret_cast<__m128i*>(result.data());\r
-                               \r
-               for(size_t n = 0; n < format_desc_.audio_samples_per_frame/32; ++n)\r
-               {       \r
-                       auto xmm0 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm1 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm2 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm3 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm4 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm5 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm6 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       auto xmm7 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
-                       \r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm0));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm1));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm2));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm3));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm4));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm5));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm6));\r
-                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(xmm7));\r
-               }\r
+                       else\r
+                       {\r
+                               for(size_t n = 0; n < format_desc_.audio_samples_per_frame/4; ++n)\r
+                               {               \r
+                                       auto sample_ps          = _mm_cvtepi32_ps(_mm_load_si128(reinterpret_cast<__m128i*>(&item.audio_data[n*4])));\r
+                                       auto res_sample_ps      = _mm_load_ps(&intermediate[n*4]);                                                                                      \r
+                                       sample_ps                       = _mm_mul_ps(sample_ps, volume_ps);     \r
+                                       res_sample_ps           = _mm_add_ps(sample_ps, res_sample_ps); \r
+                                       \r
+                                       _mm_stream_si128(result_128++, _mm_cvtps_epi32(res_sample_ps));\r
+                               }\r
+                       }\r
+               }                               \r
 \r
                items_.clear();\r
                prev_frame_transforms_ = std::move(next_frame_transforms);      \r