git.sesse.net Git - casparcg/blob - core/mixer/read_frame.cpp

   1 /*\r
   2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>\r
   3 *\r
   4 * This file is part of CasparCG (www.casparcg.com).\r
   5 *\r
   6 * CasparCG is free software: you can redistribute it and/or modify\r
   7 * it under the terms of the GNU General Public License as published by\r
   8 * the Free Software Foundation, either version 3 of the License, or\r
   9 * (at your option) any later version.\r
  10 *\r
  11 * CasparCG is distributed in the hope that it will be useful,\r
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
  14 * GNU General Public License for more details.\r
  15 *\r
  16 * You should have received a copy of the GNU General Public License\r
  17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.\r
  18 *\r
  19 * Author: Robert Nagy, ronag89@gmail.com\r
  20 */\r
  21 \r
  22 #include "../stdafx.h"\r
  23 \r
  24 #include "read_frame.h"\r
  25 \r
  26 #include "gpu/fence.h"\r
  27 #include "gpu/host_buffer.h"    \r
  28 #include "gpu/ogl_device.h"\r
  29 \r
  30 #include <tbb/mutex.h>\r
  31 \r
  32 namespace caspar { namespace core {\r
  33                                                                                                                                                                                                                                                                                                                         \r
  34 struct read_frame::implementation : boost::noncopyable\r
  35 {\r
  36         safe_ptr<ogl_device>            ogl_;\r
  37         size_t                                          size_;\r
  38         safe_ptr<host_buffer>           image_data_;\r
  39         tbb::mutex                                      mutex_;\r
  40         audio_buffer                            audio_data_;\r
  41 \r
  42 public:\r
  43         implementation(const safe_ptr<ogl_device>& ogl, size_t size, safe_ptr<host_buffer>&& image_data, audio_buffer&& audio_data) \r
  44                 : ogl_(ogl)\r
  45                 , size_(size)\r
  46                 , image_data_(std::move(image_data))\r
  47                 , audio_data_(std::move(audio_data)){}  \r
  48         \r
  49         const boost::iterator_range<const uint8_t*> image_data()\r
  50         {\r
  51                 {\r
  52                         tbb::mutex::scoped_lock lock(mutex_);\r
  53 \r
  54                         if(!image_data_->data())\r
  55                         {\r
  56                                 image_data_.get()->wait(*ogl_);\r
  57                                 ogl_->invoke([=]{image_data_.get()->map();}, high_priority);\r
  58                         }\r
  59                 }\r
  60 \r
  61                 auto ptr = static_cast<const uint8_t*>(image_data_->data());\r
  62                 return boost::iterator_range<const uint8_t*>(ptr, ptr + image_data_->size());\r
  63         }\r
  64         const boost::iterator_range<const int32_t*> audio_data()\r
  65         {\r
  66                 return boost::iterator_range<const int32_t*>(audio_data_.data(), audio_data_.data() + audio_data_.size());\r
  67         }\r
  68 };\r
  69 \r
  70 read_frame::read_frame(const safe_ptr<ogl_device>& ogl, size_t size, safe_ptr<host_buffer>&& image_data, audio_buffer&& audio_data) \r
  71         : impl_(new implementation(ogl, size, std::move(image_data), std::move(audio_data))){}\r
  72 read_frame::read_frame(){}\r
  73 const boost::iterator_range<const uint8_t*> read_frame::image_data()\r
  74 {\r
  75         return impl_ ? impl_->image_data() : boost::iterator_range<const uint8_t*>();\r
  76 }\r
  77 \r
  78 const boost::iterator_range<const int32_t*> read_frame::audio_data()\r
  79 {\r
  80         return impl_ ? impl_->audio_data() : boost::iterator_range<const int32_t*>();\r
  81 }\r
  82 \r
  83 size_t read_frame::image_size() const{return impl_ ? impl_->size_ : 0;}\r
  84 \r
  85 //#include <tbb/scalable_allocator.h>\r
  86 //#include <tbb/parallel_for.h>\r
  87 //#include <tbb/enumerable_thread_specific.h>\r
  88 //#define               CACHED_BUFFER_SIZE      4096    \r
  89 //typedef               unsigned int            UINT;\r
  90 //\r
  91 //struct cache_buffer\r
  92 //{\r
  93 //      cache_buffer() : data(scalable_aligned_malloc(CACHED_BUFFER_SIZE, 64)){}\r
  94 //      ~cache_buffer() {scalable_aligned_free(data);}\r
  95 //      void* data;\r
  96 //};\r
  97 //\r
  98 //void  CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch );\r
  99 //\r
 100 //void* copy_frame(void* dest, const safe_ptr<read_frame>& frame)\r
 101 //{\r
 102 //      auto src                = frame->image_data().begin();\r
 103 //      auto height             = 720;\r
 104 //      auto width4             = frame->image_data().size()/height;\r
 105 //\r
 106 //      CASPAR_ASSERT(frame->image_data().size() % height == 0);\r
 107 //                      \r
 108 //      tbb::affinity_partitioner ap;\r
 109 //      tbb::parallel_for(tbb::blocked_range<size_t>(0, height), [&](tbb::blocked_range<size_t>& r)\r
 110 //      {\r
 111 //              CopyFrame(const_cast<uint8_t*>(src)+r.begin()*width4, reinterpret_cast<uint8_t*>(dest)+r.begin()*width4, width4, r.size(), width4);\r
 112 //      }, ap);\r
 113 //\r
 114 //      return dest;\r
 115 //}\r
 116 //\r
 117 ////  CopyFrame( )\r
 118 ////\r
 119 ////  COPIES VIDEO FRAMES FROM USWC MEMORY TO WB SYSTEM MEMORY VIA CACHED BUFFER\r
 120 ////    ASSUMES PITCH IS A MULTIPLE OF 64B CACHE LINE SIZE, WIDTH MAY NOT BE\r
 121 //// http://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers/\r
 122 //void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch )\r
 123 //{\r
 124 //      tbb::enumerable_thread_specific<cache_buffer> cache_buffers;\r
 125 //\r
 126 //      void *          pCacheBlock = cache_buffers.local().data;\r
 127 //\r
 128 //      __m128i         x0, x1, x2, x3;\r
 129 //      __m128i         *pLoad;\r
 130 //      __m128i         *pStore;\r
 131 //      __m128i         *pCache;\r
 132 //      UINT            x, y, yLoad, yStore;\r
 133 //      UINT            rowsPerBlock;\r
 134 //      UINT            width64;\r
 135 //      UINT            extraPitch;     \r
 136 //\r
 137 //      rowsPerBlock = CACHED_BUFFER_SIZE / pitch;\r
 138 //      width64 = (width + 63) & ~0x03f;\r
 139 //      extraPitch = (pitch - width64) / 16;\r
 140 //\r
 141 //      pLoad  = (__m128i *)pSrc;\r
 142 //      pStore = (__m128i *)pDest;\r
 143 //\r
 144 //      //  COPY THROUGH 4KB CACHED BUFFER\r
 145 //      for( y = 0; y < height; y += rowsPerBlock  )\r
 146 //      {\r
 147 //              //  ROWS LEFT TO COPY AT END\r
 148 //              if( y + rowsPerBlock > height )\r
 149 //                      rowsPerBlock = height - y;\r
 150 //\r
 151 //              pCache = (__m128i *)pCacheBlock;\r
 152 //\r
 153 //              _mm_mfence();                           \r
 154 //              \r
 155 //              // LOAD ROWS OF PITCH WIDTH INTO CACHED BLOCK\r
 156 //              for( yLoad = 0; yLoad < rowsPerBlock; yLoad++ )\r
 157 //              {\r
 158 //                      // COPY A ROW, CACHE LINE AT A TIME\r
 159 //                      for( x = 0; x < pitch; x +=64 )\r
 160 //                      {\r
 161 //                              x0 = _mm_stream_load_si128( pLoad +0 );\r
 162 //                              x1 = _mm_stream_load_si128( pLoad +1 );\r
 163 //                              x2 = _mm_stream_load_si128( pLoad +2 );\r
 164 //                              x3 = _mm_stream_load_si128( pLoad +3 );\r
 165 //\r
 166 //                              _mm_store_si128( pCache +0,     x0 );\r
 167 //                              _mm_store_si128( pCache +1, x1 );\r
 168 //                              _mm_store_si128( pCache +2, x2 );\r
 169 //                              _mm_store_si128( pCache +3, x3 );\r
 170 //\r
 171 //                              pCache += 4;\r
 172 //                              pLoad += 4;\r
 173 //                      }\r
 174 //              }\r
 175 //\r
 176 //              _mm_mfence();\r
 177 //\r
 178 //              pCache = (__m128i *)pCacheBlock;\r
 179 //\r
 180 //              // STORE ROWS OF FRAME WIDTH FROM CACHED BLOCK\r
 181 //              for( yStore = 0; yStore < rowsPerBlock; yStore++ )\r
 182 //              {\r
 183 //                      // copy a row, cache line at a time\r
 184 //                      for( x = 0; x < width64; x +=64 )\r
 185 //                      {\r
 186 //                              x0 = _mm_load_si128( pCache );\r
 187 //                              x1 = _mm_load_si128( pCache +1 );\r
 188 //                              x2 = _mm_load_si128( pCache +2 );\r
 189 //                              x3 = _mm_load_si128( pCache +3 );\r
 190 //\r
 191 //                              _mm_stream_si128( pStore,       x0 );\r
 192 //                              _mm_stream_si128( pStore +1, x1 );\r
 193 //                              _mm_stream_si128( pStore +2, x2 );\r
 194 //                              _mm_stream_si128( pStore +3, x3 );\r
 195 //\r
 196 //                              pCache += 4;\r
 197 //                              pStore += 4;\r
 198 //                      }\r
 199 //\r
 200 //                      pCache += extraPitch;\r
 201 //                      pStore += extraPitch;\r
 202 //              }\r
 203 //      }\r
 204 //}\r
 205 \r
 206 }}