-\r
- auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data());\r
- auto result_128 = reinterpret_cast<__m128i*>(result.data());\r
- for(size_t n = 0; n < format_desc_.audio_samples_per_frame/32; ++n)\r
- {\r
- auto xmm0 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm1 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm2 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm3 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm4 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm5 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm6 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
- auto xmm7 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ \r
+ tbb::parallel_for\r
+ (\r
+ tbb::blocked_range<size_t>(0, format_desc_.audio_samples_per_frame/32),\r
+ [&](const tbb::blocked_range<size_t>& r)\r
+ { \r
+ auto intermediate_128 = reinterpret_cast<__m128i*>(intermediate.data()+r.begin()*32);\r
+ auto result_128 = reinterpret_cast<__m128i*>(result.data()+r.begin()*32);\r
+ \r
+ for(size_t n = r.begin(); n < r.end(); ++n)\r
+ { \r
+ auto xmm0 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm1 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm2 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm3 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm4 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm5 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm6 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r
+ auto xmm7 = _mm_load_ps(reinterpret_cast<float*>(intermediate_128++));\r