X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=core%2Fmixer%2Fread_frame.cpp;h=77ab0c3b666f85d3088d1950be01deae97491eee;hb=6016fd682b267d2886babd51b1faa9cdd1812d54;hp=ba93b77bb094651c694b5fa91aac57562bd3c0b5;hpb=78d792990549440910cc9c9bfb2af288fd9ced53;p=casparcg diff --git a/core/mixer/read_frame.cpp b/core/mixer/read_frame.cpp index ba93b77bb..77ab0c3b6 100644 --- a/core/mixer/read_frame.cpp +++ b/core/mixer/read_frame.cpp @@ -21,35 +21,184 @@ #include "read_frame.h" +#include "gpu/fence.h" #include "gpu/host_buffer.h" +#include "gpu/ogl_device.h" + +#include namespace caspar { namespace core { struct read_frame::implementation : boost::noncopyable { - safe_ptr image_data_; - std::vector audio_data_; + ogl_device& ogl_; + size_t size_; + safe_ptr image_data_; + tbb::mutex mutex_; + std::vector audio_data_; public: - implementation(safe_ptr&& image_data, std::vector&& audio_data) - : image_data_(std::move(image_data)) + implementation(ogl_device& ogl, size_t size, safe_ptr&& image_data, std::vector&& audio_data) + : ogl_(ogl) + , size_(size) + , image_data_(std::move(image_data)) , audio_data_(std::move(audio_data)){} - + const boost::iterator_range image_data() { + { + tbb::mutex::scoped_lock lock(mutex_); + + if(!image_data_->data()) + { + image_data_.get()->wait(ogl_); + ogl_.invoke([=]{image_data_.get()->map();}, high_priority); + } + } + auto ptr = static_cast(image_data_->data()); return boost::iterator_range(ptr, ptr + image_data_->size()); } - const boost::iterator_range audio_data() const + const boost::iterator_range audio_data() { - return boost::iterator_range(audio_data_.data(), audio_data_.data() + audio_data_.size()); + return boost::iterator_range(audio_data_.data(), audio_data_.data() + audio_data_.size()); } }; -read_frame::read_frame(safe_ptr&& image_data, std::vector&& audio_data) - : impl_(new implementation(std::move(image_data), std::move(audio_data))){} +read_frame::read_frame(ogl_device& ogl, size_t size, safe_ptr&& image_data, std::vector&& audio_data) + : impl_(new implementation(ogl, size, std::move(image_data), std::move(audio_data))){} +read_frame::read_frame(){} +const boost::iterator_range read_frame::image_data() +{ + return impl_ ? impl_->image_data() : boost::iterator_range(); +} + +const boost::iterator_range read_frame::audio_data() +{ + return impl_ ? impl_->audio_data() : boost::iterator_range(); +} + +size_t read_frame::image_size() const{return impl_ ? impl_->size_ : 0;} -const boost::iterator_range read_frame::image_data() const{return impl_->image_data();} -const boost::iterator_range read_frame::audio_data() const{return impl_->audio_data();} +//#include +//#include +//#include +//#define CACHED_BUFFER_SIZE 4096 +//typedef unsigned int UINT; +// +//struct cache_buffer +//{ +// cache_buffer() : data(scalable_aligned_malloc(CACHED_BUFFER_SIZE, 64)){} +// ~cache_buffer() {scalable_aligned_free(data);} +// void* data; +//}; +// +//void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch ); +// +//void* copy_frame(void* dest, const safe_ptr& frame) +//{ +// auto src = frame->image_data().begin(); +// auto height = 720; +// auto width4 = frame->image_data().size()/height; +// +// CASPAR_ASSERT(frame->image_data().size() % height == 0); +// +// tbb::affinity_partitioner ap; +// tbb::parallel_for(tbb::blocked_range(0, height), [&](tbb::blocked_range& r) +// { +// CopyFrame(const_cast(src)+r.begin()*width4, reinterpret_cast(dest)+r.begin()*width4, width4, r.size(), width4); +// }, ap); +// +// return dest; +//} +// +//// CopyFrame( ) +//// +//// COPIES VIDEO FRAMES FROM USWC MEMORY TO WB SYSTEM MEMORY VIA CACHED BUFFER +//// ASSUMES PITCH IS A MULTIPLE OF 64B CACHE LINE SIZE, WIDTH MAY NOT BE +//// http://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers/ +//void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch ) +//{ +// tbb::enumerable_thread_specific cache_buffers; +// +// void * pCacheBlock = cache_buffers.local().data; +// +// __m128i x0, x1, x2, x3; +// __m128i *pLoad; +// __m128i *pStore; +// __m128i *pCache; +// UINT x, y, yLoad, yStore; +// UINT rowsPerBlock; +// UINT width64; +// UINT extraPitch; +// +// rowsPerBlock = CACHED_BUFFER_SIZE / pitch; +// width64 = (width + 63) & ~0x03f; +// extraPitch = (pitch - width64) / 16; +// +// pLoad = (__m128i *)pSrc; +// pStore = (__m128i *)pDest; +// +// // COPY THROUGH 4KB CACHED BUFFER +// for( y = 0; y < height; y += rowsPerBlock ) +// { +// // ROWS LEFT TO COPY AT END +// if( y + rowsPerBlock > height ) +// rowsPerBlock = height - y; +// +// pCache = (__m128i *)pCacheBlock; +// +// _mm_mfence(); +// +// // LOAD ROWS OF PITCH WIDTH INTO CACHED BLOCK +// for( yLoad = 0; yLoad < rowsPerBlock; yLoad++ ) +// { +// // COPY A ROW, CACHE LINE AT A TIME +// for( x = 0; x < pitch; x +=64 ) +// { +// x0 = _mm_stream_load_si128( pLoad +0 ); +// x1 = _mm_stream_load_si128( pLoad +1 ); +// x2 = _mm_stream_load_si128( pLoad +2 ); +// x3 = _mm_stream_load_si128( pLoad +3 ); +// +// _mm_store_si128( pCache +0, x0 ); +// _mm_store_si128( pCache +1, x1 ); +// _mm_store_si128( pCache +2, x2 ); +// _mm_store_si128( pCache +3, x3 ); +// +// pCache += 4; +// pLoad += 4; +// } +// } +// +// _mm_mfence(); +// +// pCache = (__m128i *)pCacheBlock; +// +// // STORE ROWS OF FRAME WIDTH FROM CACHED BLOCK +// for( yStore = 0; yStore < rowsPerBlock; yStore++ ) +// { +// // copy a row, cache line at a time +// for( x = 0; x < width64; x +=64 ) +// { +// x0 = _mm_load_si128( pCache ); +// x1 = _mm_load_si128( pCache +1 ); +// x2 = _mm_load_si128( pCache +2 ); +// x3 = _mm_load_si128( pCache +3 ); +// +// _mm_stream_si128( pStore, x0 ); +// _mm_stream_si128( pStore +1, x1 ); +// _mm_stream_si128( pStore +2, x2 ); +// _mm_stream_si128( pStore +3, x3 ); +// +// pCache += 4; +// pStore += 4; +// } +// +// pCache += extraPitch; +// pStore += extraPitch; +// } +// } +//} }} \ No newline at end of file