2 * copyright (c) 2010 Sveriges Television AB <info@casparcg.com>
\r
4 * This file is part of CasparCG.
\r
6 * CasparCG is free software: you can redistribute it and/or modify
\r
7 * it under the terms of the GNU General Public License as published by
\r
8 * the Free Software Foundation, either version 3 of the License, or
\r
9 * (at your option) any later version.
\r
11 * CasparCG is distributed in the hope that it will be useful,
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 * GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License
\r
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
\r
21 #include "../stdafx.h"
\r
26 #include <functional>
\r
28 #include "../utility/types.h"
\r
30 #include "tbb/parallel_for.h"
\r
31 #include "tbb/blocked_range.h"
\r
33 using namespace std::tr1::placeholders;
\r
39 static const size_t STRIDE = sizeof(__m128i)*4;
\r
41 void DoclearParallel(const tbb::blocked_range<size_t>& r, const std::tr1::function<void(void*, size_t)>& func, void* dest)
\r
43 size_t offset = r.begin()*STRIDE;
\r
44 size_t size = r.size()*STRIDE;
\r
45 func(reinterpret_cast<s8*>(dest) + offset, size);
\r
48 void clearParallel(const std::tr1::function<void(void*, size_t)>& func, void* dest, size_t size)
\r
50 tbb::parallel_for(tbb::blocked_range<size_t>(0, size/STRIDE), std::bind(&DoclearParallel, std::placeholders::_1, func, dest));
\r
53 clear_fun get_clear_fun(SIMD simd)
\r
56 return clearParallel_SSE2;
\r
58 return clearParallel_REF;
\r
61 // TODO: (R.N) optimize => prefetch and cacheline loop unroll
\r
62 void clear_SSE2(void* dest, size_t size)
\r
64 __m128i val = _mm_setzero_si128();
\r
65 __m128i* ptr = reinterpret_cast<__m128i*>(dest);
\r
67 int times = size / 16;
\r
68 for(int i=0; i < times; ++i)
\r
70 _mm_stream_si128(ptr, val);
\r
75 void clearParallel_SSE2(void* dest, size_t size)
\r
77 clearParallel(&clear_SSE2, dest, size);
\r
80 void clear_REF(void* dest, size_t size)
\r
82 __stosd(reinterpret_cast<unsigned long*>(dest), 0, size/4);
\r
85 void clearParallel_REF(void* dest, size_t size)
\r
87 clearParallel(&clear_REF, dest, size);
\r