2 * Copyright 2013 Sveriges Television AB http://casparcg.com/
\r
4 * This file is part of CasparCG (www.casparcg.com).
\r
6 * CasparCG is free software: you can redistribute it and/or modify
\r
7 * it under the terms of the GNU General Public License as published by
\r
8 * the Free Software Foundation, either version 3 of the License, or
\r
9 * (at your option) any later version.
\r
11 * CasparCG is distributed in the hope that it will be useful,
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 * GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License
\r
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
\r
19 * Author: Robert Nagy, ronag89@gmail.com
\r
22 #include "../stdafx.h"
\r
24 #include "read_frame.h"
\r
26 #include "gpu/fence.h"
\r
27 #include "gpu/host_buffer.h"
\r
28 #include "gpu/ogl_device.h"
\r
30 #include <tbb/mutex.h>
\r
32 #include <boost/chrono.hpp>
\r
34 namespace caspar { namespace core {
\r
36 int64_t get_current_time_millis()
\r
38 using namespace boost::chrono;
\r
40 return duration_cast<milliseconds>(
\r
41 high_resolution_clock::now().time_since_epoch()).count();
\r
44 struct read_frame::implementation : boost::noncopyable
\r
46 safe_ptr<ogl_device> ogl_;
\r
48 safe_ptr<host_buffer> image_data_;
\r
50 audio_buffer audio_data_;
\r
51 channel_layout audio_channel_layout_;
\r
52 int64_t created_timestamp_;
\r
56 const safe_ptr<ogl_device>& ogl,
\r
58 safe_ptr<host_buffer>&& image_data,
\r
59 audio_buffer&& audio_data,
\r
60 const channel_layout& audio_channel_layout)
\r
63 , image_data_(std::move(image_data))
\r
64 , audio_data_(std::move(audio_data))
\r
65 , audio_channel_layout_(audio_channel_layout)
\r
66 , created_timestamp_(get_current_time_millis())
\r
70 const boost::iterator_range<const uint8_t*> image_data()
\r
73 tbb::mutex::scoped_lock lock(mutex_);
\r
75 if(!image_data_->data())
\r
77 image_data_.get()->wait(*ogl_);
\r
78 ogl_->invoke([=]{image_data_.get()->map();}, high_priority);
\r
82 auto ptr = static_cast<const uint8_t*>(image_data_->data());
\r
83 return boost::iterator_range<const uint8_t*>(ptr, ptr + image_data_->size());
\r
85 const boost::iterator_range<const int32_t*> audio_data()
\r
87 return boost::iterator_range<const int32_t*>(audio_data_.data(), audio_data_.data() + audio_data_.size());
\r
91 read_frame::read_frame(
\r
92 const safe_ptr<ogl_device>& ogl,
\r
94 safe_ptr<host_buffer>&& image_data,
\r
95 audio_buffer&& audio_data,
\r
96 const channel_layout& audio_channel_layout)
\r
97 : impl_(new implementation(ogl, size, std::move(image_data), std::move(audio_data), audio_channel_layout))
\r
101 read_frame::read_frame(){}
\r
102 const boost::iterator_range<const uint8_t*> read_frame::image_data()
\r
104 return impl_ ? impl_->image_data() : boost::iterator_range<const uint8_t*>();
\r
107 const boost::iterator_range<const int32_t*> read_frame::audio_data()
\r
109 return impl_ ? impl_->audio_data() : boost::iterator_range<const int32_t*>();
\r
112 size_t read_frame::image_size() const{return impl_ ? impl_->size_ : 0;}
\r
113 int read_frame::num_channels() const { return impl_ ? impl_->audio_channel_layout_.num_channels : 0; }
\r
114 const multichannel_view<const int32_t, boost::iterator_range<const int32_t*>::const_iterator> read_frame::multichannel_view() const
\r
116 return make_multichannel_view<const int32_t>(
\r
117 impl_->audio_data().begin(),
\r
118 impl_->audio_data().end(),
\r
119 impl_->audio_channel_layout_);
\r
122 int64_t read_frame::get_age_millis() const
\r
124 return impl_ ? get_current_time_millis() - impl_->created_timestamp_ : 0;
\r
127 //#include <tbb/scalable_allocator.h>
\r
128 //#include <tbb/parallel_for.h>
\r
129 //#include <tbb/enumerable_thread_specific.h>
\r
130 //#define CACHED_BUFFER_SIZE 4096
\r
131 //typedef unsigned int UINT;
\r
133 //struct cache_buffer
\r
135 // cache_buffer() : data(scalable_aligned_malloc(CACHED_BUFFER_SIZE, 64)){}
\r
136 // ~cache_buffer() {scalable_aligned_free(data);}
\r
140 //void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch );
\r
142 //void* copy_frame(void* dest, const safe_ptr<read_frame>& frame)
\r
144 // auto src = frame->image_data().begin();
\r
145 // auto height = 720;
\r
146 // auto width4 = frame->image_data().size()/height;
\r
148 // CASPAR_ASSERT(frame->image_data().size() % height == 0);
\r
150 // tbb::affinity_partitioner ap;
\r
151 // tbb::parallel_for(tbb::blocked_range<size_t>(0, height), [&](tbb::blocked_range<size_t>& r)
\r
153 // CopyFrame(const_cast<uint8_t*>(src)+r.begin()*width4, reinterpret_cast<uint8_t*>(dest)+r.begin()*width4, width4, r.size(), width4);
\r
161 //// COPIES VIDEO FRAMES FROM USWC MEMORY TO WB SYSTEM MEMORY VIA CACHED BUFFER
\r
162 //// ASSUMES PITCH IS A MULTIPLE OF 64B CACHE LINE SIZE, WIDTH MAY NOT BE
\r
163 //// http://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers/
\r
164 //void CopyFrame( void * pSrc, void * pDest, UINT width, UINT height, UINT pitch )
\r
166 // tbb::enumerable_thread_specific<cache_buffer> cache_buffers;
\r
168 // void * pCacheBlock = cache_buffers.local().data;
\r
170 // __m128i x0, x1, x2, x3;
\r
172 // __m128i *pStore;
\r
173 // __m128i *pCache;
\r
174 // UINT x, y, yLoad, yStore;
\r
175 // UINT rowsPerBlock;
\r
177 // UINT extraPitch;
\r
179 // rowsPerBlock = CACHED_BUFFER_SIZE / pitch;
\r
180 // width64 = (width + 63) & ~0x03f;
\r
181 // extraPitch = (pitch - width64) / 16;
\r
183 // pLoad = (__m128i *)pSrc;
\r
184 // pStore = (__m128i *)pDest;
\r
186 // // COPY THROUGH 4KB CACHED BUFFER
\r
187 // for( y = 0; y < height; y += rowsPerBlock )
\r
189 // // ROWS LEFT TO COPY AT END
\r
190 // if( y + rowsPerBlock > height )
\r
191 // rowsPerBlock = height - y;
\r
193 // pCache = (__m128i *)pCacheBlock;
\r
197 // // LOAD ROWS OF PITCH WIDTH INTO CACHED BLOCK
\r
198 // for( yLoad = 0; yLoad < rowsPerBlock; yLoad++ )
\r
200 // // COPY A ROW, CACHE LINE AT A TIME
\r
201 // for( x = 0; x < pitch; x +=64 )
\r
203 // x0 = _mm_stream_load_si128( pLoad +0 );
\r
204 // x1 = _mm_stream_load_si128( pLoad +1 );
\r
205 // x2 = _mm_stream_load_si128( pLoad +2 );
\r
206 // x3 = _mm_stream_load_si128( pLoad +3 );
\r
208 // _mm_store_si128( pCache +0, x0 );
\r
209 // _mm_store_si128( pCache +1, x1 );
\r
210 // _mm_store_si128( pCache +2, x2 );
\r
211 // _mm_store_si128( pCache +3, x3 );
\r
220 // pCache = (__m128i *)pCacheBlock;
\r
222 // // STORE ROWS OF FRAME WIDTH FROM CACHED BLOCK
\r
223 // for( yStore = 0; yStore < rowsPerBlock; yStore++ )
\r
225 // // copy a row, cache line at a time
\r
226 // for( x = 0; x < width64; x +=64 )
\r
228 // x0 = _mm_load_si128( pCache );
\r
229 // x1 = _mm_load_si128( pCache +1 );
\r
230 // x2 = _mm_load_si128( pCache +2 );
\r
231 // x3 = _mm_load_si128( pCache +3 );
\r
233 // _mm_stream_si128( pStore, x0 );
\r
234 // _mm_stream_si128( pStore +1, x1 );
\r
235 // _mm_stream_si128( pStore +2, x2 );
\r
236 // _mm_stream_si128( pStore +3, x3 );
\r
242 // pCache += extraPitch;
\r
243 // pStore += extraPitch;
\r