\r
#include "read_frame.h"\r
\r
+#include "gpu/fence.h"\r
#include "gpu/host_buffer.h" \r
+#include "gpu/ogl_device.h"\r
+\r
+#include <tbb/mutex.h>\r
\r
namespace caspar { namespace core {\r
\r
struct read_frame::implementation : boost::noncopyable\r
{\r
- safe_ptr<host_buffer> image_data_;\r
- std::vector<int16_t> audio_data_;\r
+ ogl_device& ogl_;\r
+ size_t size_;\r
+ safe_ptr<host_buffer> image_data_;\r
+ tbb::mutex mutex_;\r
+ audio_buffer audio_data_;\r
\r
public:\r
- implementation(safe_ptr<host_buffer>&& image_data, std::vector<int16_t>&& audio_data) \r
- : image_data_(std::move(image_data))\r
+ implementation(ogl_device& ogl, size_t size, safe_ptr<host_buffer>&& image_data, audio_buffer&& audio_data) \r
+ : ogl_(ogl)\r
+ , size_(size)\r
+ , image_data_(std::move(image_data))\r
, audio_data_(std::move(audio_data)){} \r
-\r
+ \r
const boost::iterator_range<const uint8_t*> image_data()\r
{\r
+ {\r
+ tbb::mutex::scoped_lock lock(mutex_);\r
+\r
+ if(!image_data_->data())\r
+ {\r
+ image_data_.get()->wait(ogl_);\r
+ ogl_.invoke([=]{image_data_.get()->map();}, high_priority);\r
+ }\r
+ }\r
+\r
auto ptr = static_cast<const uint8_t*>(image_data_->data());\r
return boost::iterator_range<const uint8_t*>(ptr, ptr + image_data_->size());\r
}\r
- const boost::iterator_range<const int16_t*> audio_data() const\r
+ const boost::iterator_range<const int32_t*> audio_data()\r
{\r
- return boost::iterator_range<const int16_t*>(audio_data_.data(), audio_data_.data() + audio_data_.size());\r
+ return boost::iterator_range<const int32_t*>(audio_data_.data(), audio_data_.data() + audio_data_.size());\r
}\r
};\r
\r
-read_frame::read_frame(safe_ptr<host_buffer>&& image_data, std::vector<int16_t>&& audio_data) \r
- : impl_(new implementation(std::move(image_data), std::move(audio_data))){}\r
+read_frame::read_frame(ogl_device& ogl, size_t size, safe_ptr<host_buffer>&& image_data, audio_buffer&& audio_data) \r
+ : impl_(new implementation(ogl, size, std::move(image_data), std::move(audio_data))){}\r
+read_frame::read_frame(){}\r
+const boost::iterator_range<const uint8_t*> read_frame::image_data()\r
+{\r
+ return impl_ ? impl_->image_data() : boost::iterator_range<const uint8_t*>();\r
+}\r
+\r
+const boost::iterator_range<const int32_t*> read_frame::audio_data()\r
+{\r
+ return impl_ ? impl_->audio_data() : boost::iterator_range<const int32_t*>();\r
+}\r
+\r
+size_t read_frame::image_size() const{return impl_ ? impl_->size_ : 0;}\r
\r
-const boost::iterator_range<const uint8_t*> read_frame::image_data() const{return impl_->image_data();}\r
-const boost::iterator_range<const int16_t*> read_frame::audio_data() const{return impl_->audio_data();}\r
+//#include <tbb/scalable_allocator.h>\r
+//#include <tbb/parallel_for.h>\r
+//#include <tbb/enumerable_thread_specific.h>\r
+//#define CACHED_BUFFER_SIZE 4096 \r
+//typedef unsigned int UINT;\r
+//\r
+//struct cache_buffer\r
+//{\r
+// cache_buffer() : data(scalable_aligned_malloc(CACHED_BUFFER_SIZE, 64)){}\r
+// ~cache_buffer() {scalable_aligned_free(data);}\r
+// void* data;\r
+//};\r
+//\r
+//void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch );\r
+//\r
+//void* copy_frame(void* dest, const safe_ptr<read_frame>& frame)\r
+//{\r
+// auto src = frame->image_data().begin();\r
+// auto height = 720;\r
+// auto width4 = frame->image_data().size()/height;\r
+//\r
+// CASPAR_ASSERT(frame->image_data().size() % height == 0);\r
+// \r
+// tbb::affinity_partitioner ap;\r
+// tbb::parallel_for(tbb::blocked_range<size_t>(0, height), [&](tbb::blocked_range<size_t>& r)\r
+// {\r
+// CopyFrame(const_cast<uint8_t*>(src)+r.begin()*width4, reinterpret_cast<uint8_t*>(dest)+r.begin()*width4, width4, r.size(), width4);\r
+// }, ap);\r
+//\r
+// return dest;\r
+//}\r
+//\r
+//// CopyFrame( )\r
+////\r
+//// COPIES VIDEO FRAMES FROM USWC MEMORY TO WB SYSTEM MEMORY VIA CACHED BUFFER\r
+//// ASSUMES PITCH IS A MULTIPLE OF 64B CACHE LINE SIZE, WIDTH MAY NOT BE\r
+//// http://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers/\r
+//void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch )\r
+//{\r
+// tbb::enumerable_thread_specific<cache_buffer> cache_buffers;\r
+//\r
+// void * pCacheBlock = cache_buffers.local().data;\r
+//\r
+// __m128i x0, x1, x2, x3;\r
+// __m128i *pLoad;\r
+// __m128i *pStore;\r
+// __m128i *pCache;\r
+// UINT x, y, yLoad, yStore;\r
+// UINT rowsPerBlock;\r
+// UINT width64;\r
+// UINT extraPitch; \r
+//\r
+// rowsPerBlock = CACHED_BUFFER_SIZE / pitch;\r
+// width64 = (width + 63) & ~0x03f;\r
+// extraPitch = (pitch - width64) / 16;\r
+//\r
+// pLoad = (__m128i *)pSrc;\r
+// pStore = (__m128i *)pDest;\r
+//\r
+// // COPY THROUGH 4KB CACHED BUFFER\r
+// for( y = 0; y < height; y += rowsPerBlock )\r
+// {\r
+// // ROWS LEFT TO COPY AT END\r
+// if( y + rowsPerBlock > height )\r
+// rowsPerBlock = height - y;\r
+//\r
+// pCache = (__m128i *)pCacheBlock;\r
+//\r
+// _mm_mfence(); \r
+// \r
+// // LOAD ROWS OF PITCH WIDTH INTO CACHED BLOCK\r
+// for( yLoad = 0; yLoad < rowsPerBlock; yLoad++ )\r
+// {\r
+// // COPY A ROW, CACHE LINE AT A TIME\r
+// for( x = 0; x < pitch; x +=64 )\r
+// {\r
+// x0 = _mm_stream_load_si128( pLoad +0 );\r
+// x1 = _mm_stream_load_si128( pLoad +1 );\r
+// x2 = _mm_stream_load_si128( pLoad +2 );\r
+// x3 = _mm_stream_load_si128( pLoad +3 );\r
+//\r
+// _mm_store_si128( pCache +0, x0 );\r
+// _mm_store_si128( pCache +1, x1 );\r
+// _mm_store_si128( pCache +2, x2 );\r
+// _mm_store_si128( pCache +3, x3 );\r
+//\r
+// pCache += 4;\r
+// pLoad += 4;\r
+// }\r
+// }\r
+//\r
+// _mm_mfence();\r
+//\r
+// pCache = (__m128i *)pCacheBlock;\r
+//\r
+// // STORE ROWS OF FRAME WIDTH FROM CACHED BLOCK\r
+// for( yStore = 0; yStore < rowsPerBlock; yStore++ )\r
+// {\r
+// // copy a row, cache line at a time\r
+// for( x = 0; x < width64; x +=64 )\r
+// {\r
+// x0 = _mm_load_si128( pCache );\r
+// x1 = _mm_load_si128( pCache +1 );\r
+// x2 = _mm_load_si128( pCache +2 );\r
+// x3 = _mm_load_si128( pCache +3 );\r
+//\r
+// _mm_stream_si128( pStore, x0 );\r
+// _mm_stream_si128( pStore +1, x1 );\r
+// _mm_stream_si128( pStore +2, x2 );\r
+// _mm_stream_si128( pStore +3, x3 );\r
+//\r
+// pCache += 4;\r
+// pStore += 4;\r
+// }\r
+//\r
+// pCache += extraPitch;\r
+// pStore += extraPitch;\r
+// }\r
+// }\r
+//}\r
\r
}}
\ No newline at end of file