git.sesse.net Git - casparcg/blob - common/image/copy.cpp

   1 /*\r
   2 * copyright (c) 2010 Sveriges Television AB <info@casparcg.com>\r
   3 *\r
   4 *  This file is part of CasparCG.\r
   5 *\r
   6 *    CasparCG is free software: you can redistribute it and/or modify\r
   7 *    it under the terms of the GNU General Public License as published by\r
   8 *    the Free Software Foundation, either version 3 of the License, or\r
   9 *    (at your option) any later version.\r
  10 *\r
  11 *    CasparCG is distributed in the hope that it will be useful,\r
  12 *    but WITHOUT ANY WARRANTY; without even the implied warranty of\r
  13 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
  14 *    GNU General Public License for more details.\r
  15 \r
  16 *    You should have received a copy of the GNU General Public License\r
  17 *    along with CasparCG.  If not, see <http://www.gnu.org/licenses/>.\r
  18 *\r
  19 */\r
  20  \r
  21 #include "../stdafx.h"\r
  22 \r
  23 #include "copy.h"\r
  24 \r
  25 #include <intrin.h>\r
  26 #include <functional>\r
  27 \r
  28 #include "../utility/types.h"\r
  29 \r
  30 #include "tbb/parallel_for.h"\r
  31 #include "tbb/blocked_range.h"\r
  32 \r
  33 using namespace std::tr1::placeholders;\r
  34 \r
  35 namespace caspar{\r
  36 namespace common{\r
  37 namespace image{\r
  38 \r
  39 static const size_t STRIDE = sizeof(__m128i)*4;\r
  40 \r
  41 void DocopyParallel(const tbb::blocked_range<size_t>& r, const std::tr1::function<void(void*, const void*, size_t)>& func, void* dest, const void* source)\r
  42 {\r
  43         size_t offset = r.begin()*STRIDE;\r
  44         size_t size = r.size()*STRIDE;\r
  45         func(reinterpret_cast<s8*>(dest) + offset, reinterpret_cast<const s8*>(source) + offset, size);\r
  46 }\r
  47 \r
  48 void copyParallel(const std::tr1::function<void(void*, const void*, size_t)>& func, void* dest, const void* source, size_t size)\r
  49 {\r
  50         tbb::parallel_for(tbb::blocked_range<size_t>(0, size/STRIDE), std::bind(&DocopyParallel, std::placeholders::_1, func, dest, source));   \r
  51 }\r
  52 \r
  53 copy_fun get_copy_fun(SIMD simd)\r
  54 {\r
  55         if(simd >= SSE2)\r
  56                 return copyParallel_SSE2;\r
  57         else\r
  58                 return copyParallel_REF;\r
  59 }\r
  60 \r
  61 // TODO: (R.N) optimize => prefetch and cacheline loop unroll\r
  62 void copy_SSE2(void* dest, const void* source, size_t size)\r
  63 {\r
  64         __m128i val = _mm_setzero_si128();\r
  65         __m128i* pD = reinterpret_cast<__m128i*>(dest);\r
  66         const __m128i* pS = reinterpret_cast<const __m128i*>(source);\r
  67 \r
  68         int times = size / 16;\r
  69         for(int i=0; i < times; ++i) \r
  70         {\r
  71                 val = _mm_load_si128(pS);\r
  72                 _mm_stream_si128(pD, val);\r
  73 \r
  74                 ++pD;\r
  75                 ++pS;\r
  76         }\r
  77         _mm_mfence();   //ensure last WC buffers get flushed to memory\r
  78 }\r
  79 \r
  80 void copyParallel_SSE2(void* dest, const void* source, size_t size)\r
  81 {\r
  82         copyParallel(&copy_SSE2, dest, source, size);\r
  83 }\r
  84 \r
  85 void copy_REF(void* dest, const void* source, size_t size)\r
  86 {\r
  87         __movsd(reinterpret_cast<unsigned long*>(dest), reinterpret_cast<const unsigned long*>(source), size/4);\r
  88 }\r
  89 \r
  90 void copyParallel_REF(void* dest, const void* source, size_t size)\r
  91 {\r
  92         copyParallel(&copy_REF, dest, source, size);\r
  93 }\r
  94 \r
  95 }\r
  96 }\r
  97 }