]> git.sesse.net Git - casparcg/blob - accelerator/ogl/util/device.cpp
7afefa616c46bf49b5220501944c4a2c854a35eb
[casparcg] / accelerator / ogl / util / device.cpp
1 /*
2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
3 *
4 * This file is part of CasparCG (www.casparcg.com).
5 *
6 * CasparCG is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * CasparCG is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Author: Robert Nagy, ronag89@gmail.com
20 */
21
22 // TODO: Smart GC
23
24 #include "../../StdAfx.h"
25
26 #include "device.h"
27
28 #include "buffer.h"
29 #include "texture.h"
30 #include "shader.h"
31
32 #include <common/assert.h>
33 #include <common/except.h>
34 #include <common/future.h>
35 #include <common/array.h>
36 #include <common/memory.h>
37 #include <common/gl/gl_check.h>
38 #include <common/timer.h>
39
40 #include <GL/glew.h>
41
42 #include <SFML/Window/Context.hpp>
43
44 #include <tbb/concurrent_unordered_map.h>
45 #include <tbb/concurrent_hash_map.h>
46 #include <tbb/concurrent_queue.h>
47
48 #include <boost/utility/declval.hpp>
49 #include <boost/property_tree/ptree.hpp>
50
51 #include <array>
52 #include <unordered_map>
53
54 #include <asmlib.h>
55 #include <tbb/parallel_for.h>
56
57 namespace caspar { namespace accelerator { namespace ogl {
58                 
59 struct device::impl : public std::enable_shared_from_this<impl>
60 {       
61         static_assert(std::is_same<decltype(boost::declval<device>().impl_), spl::shared_ptr<impl>>::value, "impl_ must be shared_ptr");
62
63         tbb::concurrent_hash_map<buffer*, std::shared_ptr<texture>> texture_cache_;
64
65         std::unique_ptr<sf::Context> device_;
66         
67         std::array<tbb::concurrent_unordered_map<std::size_t, tbb::concurrent_bounded_queue<std::shared_ptr<texture>>>, 8>      device_pools_;
68         std::array<tbb::concurrent_unordered_map<std::size_t, tbb::concurrent_bounded_queue<std::shared_ptr<buffer>>>, 2>       host_pools_;
69         
70         GLuint fbo_;
71
72         executor& executor_;
73                                 
74         impl(executor& executor) 
75                 : executor_(executor)
76         {
77                 executor_.set_capacity(256);
78
79                 CASPAR_LOG(info) << L"Initializing OpenGL Device.";
80                 
81                 executor_.invoke([=]
82                 {
83                         device_.reset(new sf::Context());
84                         device_->setActive(true);               
85                                                 
86                         if (glewInit() != GLEW_OK)
87                                 CASPAR_THROW_EXCEPTION(gl::ogl_exception() << msg_info("Failed to initialize GLEW."));
88                 
89                         if(!GLEW_VERSION_3_0)
90                                 CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Your graphics card does not meet the minimum hardware requirements since it does not support OpenGL 3.0 or higher."));
91         
92                         glGenFramebuffers(1, &fbo_);                            
93                         glBindFramebuffer(GL_FRAMEBUFFER, fbo_);
94                 });
95                                 
96                 CASPAR_LOG(info) << L"Successfully initialized OpenGL " << version();
97         }
98
99         ~impl()
100         {
101                 auto context = executor_.is_current() ? std::string() : get_context();
102
103                 executor_.invoke([=]
104                 {
105                         CASPAR_SCOPED_CONTEXT_MSG(context);
106                         texture_cache_.clear();
107
108                         for (auto& pool : host_pools_)
109                                 pool.clear();
110
111                         for (auto& pool : device_pools_)
112                                 pool.clear();
113
114                         glDeleteFramebuffers(1, &fbo_);
115
116                         device_.reset();
117                 });
118         }
119
120         boost::property_tree::wptree info() const
121         {
122                 boost::property_tree::wptree info;
123
124                 boost::property_tree::wptree pooled_device_buffers;
125                 size_t total_pooled_device_buffer_size  = 0;
126                 size_t total_pooled_device_buffer_count = 0;
127
128                 for (size_t i = 0; i < device_pools_.size(); ++i)
129                 {
130                         auto& pools             = device_pools_.at(i);
131                         bool mipmapping = i > 3;
132                         auto stride             = mipmapping ? i - 3 : i + 1;
133
134                         for (auto& pool : pools)
135                         {
136                                 auto width      = pool.first >> 16;
137                                 auto height     = pool.first & 0x0000FFFF;
138                                 auto size       = width * height * stride;
139                                 auto count      = pool.second.size();
140
141                                 if (count == 0)
142                                         continue;
143
144                                 boost::property_tree::wptree pool_info;
145
146                                 pool_info.add(L"stride",                stride);
147                                 pool_info.add(L"mipmapping",    mipmapping);
148                                 pool_info.add(L"width",                 width);
149                                 pool_info.add(L"height",                height);
150                                 pool_info.add(L"size",                  size);
151                                 pool_info.add(L"count",                 count);
152
153                                 total_pooled_device_buffer_size         += size * count;
154                                 total_pooled_device_buffer_count        += count;
155
156                                 pooled_device_buffers.add_child(L"device_buffer_pool", pool_info);
157                         }
158                 }
159
160                 info.add_child(L"gl.details.pooled_device_buffers", pooled_device_buffers);
161
162                 boost::property_tree::wptree pooled_host_buffers;
163                 size_t total_read_size          = 0;
164                 size_t total_write_size         = 0;
165                 size_t total_read_count         = 0;
166                 size_t total_write_count        = 0;
167
168                 for (size_t i = 0; i < host_pools_.size(); ++i)
169                 {
170                         auto& pools     = host_pools_.at(i);
171                         auto usage      = static_cast<buffer::usage>(i);
172
173                         for (auto& pool : pools)
174                         {
175                                 auto size       = pool.first;
176                                 auto count      = pool.second.size();
177
178                                 if (count == 0)
179                                         continue;
180
181                                 boost::property_tree::wptree pool_info;
182
183                                 pool_info.add(L"usage", usage == buffer::usage::read_only ? L"read_only" : L"write_only");
184                                 pool_info.add(L"size",  size);
185                                 pool_info.add(L"count", count);
186
187                                 pooled_host_buffers.add_child(L"host_buffer_pool", pool_info);
188
189                                 (usage == buffer::usage::read_only ? total_read_count : total_write_count) += count;
190                                 (usage == buffer::usage::read_only ? total_read_size : total_write_size) += size * count;
191                         }
192                 }
193
194                 info.add_child(L"gl.details.pooled_host_buffers",                               pooled_host_buffers);
195                 info.add(L"gl.summary.pooled_device_buffers.total_count",               total_pooled_device_buffer_count);
196                 info.add(L"gl.summary.pooled_device_buffers.total_size",                total_pooled_device_buffer_size);
197                 info.add_child(L"gl.summary.all_device_buffers",                                texture::info());
198                 info.add(L"gl.summary.pooled_host_buffers.total_read_count",    total_read_count);
199                 info.add(L"gl.summary.pooled_host_buffers.total_write_count",   total_write_count);
200                 info.add(L"gl.summary.pooled_host_buffers.total_read_size",             total_read_size);
201                 info.add(L"gl.summary.pooled_host_buffers.total_write_size",    total_write_size);
202                 info.add_child(L"gl.summary.all_host_buffers",                                  buffer::info());
203
204                 return info;
205         }
206                 
207         std::wstring version()
208         {       
209                 try
210                 {
211                         return executor_.invoke([]
212                         {
213                                 return u16(reinterpret_cast<const char*>(GL2(glGetString(GL_VERSION)))) + L" " + u16(reinterpret_cast<const char*>(GL2(glGetString(GL_VENDOR))));
214                         });     
215                 }
216                 catch(...)
217                 {
218                         return L"Not found";;
219                 }
220         }
221                                                         
222         spl::shared_ptr<texture> create_texture(int width, int height, int stride, bool mipmapped, bool clear)
223         {
224                 CASPAR_VERIFY(stride > 0 && stride < 5);
225                 CASPAR_VERIFY(width > 0 && height > 0);
226
227                 if(!executor_.is_current())
228                         CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info("Operation only valid in an OpenGL Context."));
229                                         
230                 auto pool = &device_pools_[stride - 1 + (mipmapped ? 4 : 0)][((width << 16) & 0xFFFF0000) | (height & 0x0000FFFF)];
231                 
232                 std::shared_ptr<texture> tex;
233                 if(!pool->try_pop(tex))         
234                         tex = spl::make_shared<texture>(width, height, stride, mipmapped);
235         
236                 if(clear)
237                         tex->clear();
238
239                 return spl::shared_ptr<texture>(tex.get(), [tex, pool](texture*) mutable
240                 {               
241                         pool->push(tex);        
242                 });
243         }
244                 
245         spl::shared_ptr<buffer> create_buffer(std::size_t size, buffer::usage usage)
246         {
247                 CASPAR_VERIFY(size > 0);
248                 
249                 auto pool = &host_pools_[static_cast<int>(usage)][size];
250                 
251                 std::shared_ptr<buffer> buf;
252                 if(!pool->try_pop(buf)) 
253                 {
254                         caspar::timer timer;
255
256                         auto context = executor_.is_current() ? std::string() : get_context();
257
258                         buf = executor_.invoke([&]
259                         {
260                                 CASPAR_SCOPED_CONTEXT_MSG(context);
261                                 return std::make_shared<buffer>(size, usage);
262                         }, task_priority::high_priority);
263                         
264                         if(timer.elapsed() > 0.02)
265                                 CASPAR_LOG(warning) << L"[ogl-device] Performance warning. Buffer allocation blocked: " << timer.elapsed();
266                 }
267                 
268                 std::weak_ptr<impl> self = shared_from_this(); // buffers can leave the device context, take a hold on life-time.
269                 return spl::shared_ptr<buffer>(buf.get(), [=](buffer*) mutable
270                 {
271                         auto strong = self.lock();
272
273                         if (strong)
274                         {
275                                 auto context = executor_.is_current() ? std::string() : get_context();
276
277                                 strong->executor_.invoke([&]
278                                 {
279                                         CASPAR_SCOPED_CONTEXT_MSG(context);
280                                         strong->texture_cache_.erase(buf.get());
281                                 }, task_priority::high_priority);
282                                 
283                                 pool->push(buf);
284                         }
285                         else
286                         {
287                                 CASPAR_LOG(info) << L"Buffer outlived ogl device";
288                         }
289                 });
290         }
291
292         array<std::uint8_t> create_array(std::size_t size)
293         {               
294                 auto buf = create_buffer(size, buffer::usage::write_only);
295                 return array<std::uint8_t>(buf->data(), buf->size(), false, buf);
296         }
297
298         template<typename T>
299         std::shared_ptr<buffer> copy_to_buf(const T& source)
300         {
301                 std::shared_ptr<buffer> buf;
302
303                 auto tmp = source.template storage<spl::shared_ptr<buffer>>();
304                 if(tmp)
305                         buf = *tmp;
306                 else
307                 {                       
308                         buf = create_buffer(source.size(), buffer::usage::write_only);
309                         tbb::parallel_for(tbb::blocked_range<std::size_t>(0, source.size()), [&](const tbb::blocked_range<std::size_t>& r)
310                         {
311                                 A_memcpy(buf->data() + r.begin(), source.data() + r.begin(), r.size());
312                         });
313                 }
314
315                 return buf;
316         }
317
318         // TODO: Since the returned texture is cached it SHOULD NOT be modified.
319         std::future<std::shared_ptr<texture>> copy_async(const array<const std::uint8_t>& source, int width, int height, int stride, bool mipmapped)
320         {
321                 std::shared_ptr<buffer> buf = copy_to_buf(source);
322                 auto context = executor_.is_current() ? std::string() : get_context();
323
324                 return executor_.begin_invoke([=]() -> std::shared_ptr<texture>
325                 {
326                         CASPAR_SCOPED_CONTEXT_MSG(context);
327                         tbb::concurrent_hash_map<buffer*, std::shared_ptr<texture>>::const_accessor a;
328                         if(texture_cache_.find(a, buf.get()))
329                                 return spl::make_shared_ptr(a->second);
330
331                         auto texture = create_texture(width, height, stride, mipmapped, false);
332                         texture->copy_from(*buf);
333
334                         texture_cache_.insert(std::make_pair(buf.get(), texture));
335                         
336                         return texture;
337                 }, task_priority::high_priority);
338         }
339         
340         std::future<std::shared_ptr<texture>> copy_async(const array<std::uint8_t>& source, int width, int height, int stride, bool mipmapped)
341         {
342                 std::shared_ptr<buffer> buf = copy_to_buf(source);
343                 auto context = executor_.is_current() ? std::string() : get_context();
344
345                 return executor_.begin_invoke([=]() -> std::shared_ptr<texture>
346                 {
347                         CASPAR_SCOPED_CONTEXT_MSG(context);
348                         auto texture = create_texture(width, height, stride, mipmapped, false);
349                         texture->copy_from(*buf);       
350                         
351                         return texture;
352                 }, task_priority::high_priority);
353         }
354
355         std::future<array<const std::uint8_t>> copy_async(const spl::shared_ptr<texture>& source)
356         {
357                 if(!executor_.is_current())
358                         CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info("Operation only valid in an OpenGL Context."));
359
360                 auto buffer = create_buffer(source->size(), buffer::usage::read_only); 
361                 source->copy_to(*buffer);       
362
363                 auto self = shared_from_this();
364                 auto context = get_context();
365                 auto cmd = [self, buffer, context]() mutable -> array<const std::uint8_t>
366                 {
367                         self->executor_.invoke([&buffer, &context] // Defer blocking "map" call until data is needed.
368                         {
369                                 CASPAR_LOG_CALL(trace) << "Readback <- " << context;
370                                 buffer->map();
371                         });
372                         return array<const std::uint8_t>(buffer->data(), buffer->size(), true, buffer);
373                 };
374                 return std::async(std::launch::deferred, std::move(cmd));
375         }
376
377         std::future<void> gc()
378         {
379                 return executor_.begin_invoke([=]
380                 {
381                         CASPAR_LOG(info) << " ogl: Running GC.";
382
383                         try
384                         {
385                                 for (auto& pools : device_pools_)
386                                 {
387                                         for (auto& pool : pools)
388                                                 pool.second.clear();
389                                 }
390                                 for (auto& pools : host_pools_)
391                                 {
392                                         for (auto& pool : pools)
393                                                 pool.second.clear();
394                                 }
395                         }
396                         catch (...)
397                         {
398                                 CASPAR_LOG_CURRENT_EXCEPTION();
399                         }
400                 }, task_priority::high_priority);
401         }
402 };
403
404 device::device() 
405         : executor_(L"OpenGL Rendering Context")
406         , impl_(new impl(executor_)){}
407 device::~device(){}
408 spl::shared_ptr<texture>                                        device::create_texture(int width, int height, int stride, bool mipmapped){ return impl_->create_texture(width, height, stride, mipmapped, true); }
409 array<std::uint8_t>                                                     device::create_array(int size){return impl_->create_array(size);}
410 std::future<std::shared_ptr<texture>>           device::copy_async(const array<const std::uint8_t>& source, int width, int height, int stride, bool mipmapped){return impl_->copy_async(source, width, height, stride, mipmapped);}
411 std::future<std::shared_ptr<texture>>           device::copy_async(const array<std::uint8_t>& source, int width, int height, int stride, bool mipmapped){ return impl_->copy_async(source, width, height, stride, mipmapped); }
412 std::future<array<const std::uint8_t>>          device::copy_async(const spl::shared_ptr<texture>& source){return impl_->copy_async(source);}
413 std::future<void>                                                       device::gc() { return impl_->gc(); }
414 boost::property_tree::wptree                            device::info() const { return impl_->info(); }
415 std::wstring                                                            device::version() const{return impl_->version();}
416
417
418 }}}
419
420