2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
\r
4 * This file is part of CasparCG (www.casparcg.com).
\r
6 * CasparCG is free software: you can redistribute it and/or modify
\r
7 * it under the terms of the GNU General Public License as published by
\r
8 * the Free Software Foundation, either version 3 of the License, or
\r
9 * (at your option) any later version.
\r
11 * CasparCG is distributed in the hope that it will be useful,
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 * GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License
\r
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
\r
19 * Author: Robert Nagy, ronag89@gmail.com
\r
22 #include "../../stdafx.h"
\r
24 #include "image_mixer.h"
\r
26 #include "image_kernel.h"
\r
28 #include "../util/write_frame.h"
\r
29 #include "../util/context.h"
\r
30 #include "../util/host_buffer.h"
\r
31 #include "../util/device_buffer.h"
\r
33 #include <common/gl/gl_check.h>
\r
34 #include <common/concurrency/async.h>
\r
35 #include <common/memory/memcpy.h>
\r
37 #include <core/frame/write_frame.h>
\r
38 #include <core/frame/frame_transform.h>
\r
39 #include <core/frame/pixel_format.h>
\r
40 #include <core/video_format.h>
\r
44 #include <gl/glew.h>
\r
46 #include <boost/foreach.hpp>
\r
47 #include <boost/range/algorithm_ext/erase.hpp>
\r
48 #include <boost/thread/future.hpp>
\r
50 #include <algorithm>
\r
53 using namespace boost::assign;
\r
55 namespace caspar { namespace accelerator { namespace ogl {
\r
57 typedef boost::shared_future<spl::shared_ptr<device_buffer>> future_texture;
\r
61 core::pixel_format_desc pix_desc;
\r
62 std::vector<spl::shared_ptr<host_buffer>> buffers;
\r
63 std::vector<future_texture> textures;
\r
64 core::image_transform transform;
\r
68 : pix_desc(core::pixel_format::invalid)
\r
73 bool operator==(const item& lhs, const item& rhs)
\r
75 return lhs.buffers == rhs.buffers && lhs.transform == rhs.transform;
\r
78 bool operator!=(const item& lhs, const item& rhs)
\r
80 return !(lhs == rhs);
\r
85 std::vector<item> items;
\r
86 core::blend_mode blend_mode;
\r
89 : blend_mode(core::blend_mode::normal)
\r
93 layer(std::vector<item> items, core::blend_mode blend_mode)
\r
94 : items(std::move(items))
\r
95 , blend_mode(blend_mode)
\r
100 bool operator==(const layer& lhs, const layer& rhs)
\r
102 return lhs.items == rhs.items && lhs.blend_mode == rhs.blend_mode;
\r
105 bool operator!=(const layer& lhs, const layer& rhs)
\r
107 return !(lhs == rhs);
\r
110 class image_renderer
\r
112 spl::shared_ptr<context> ogl_;
\r
113 image_kernel kernel_;
\r
114 std::pair<std::vector<layer>, boost::shared_future<boost::iterator_range<const uint8_t*>>> last_image_;
\r
116 image_renderer(const spl::shared_ptr<context>& ogl)
\r
122 boost::shared_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)
\r
124 if(last_image_.first == layers && last_image_.second.has_value())
\r
125 return last_image_.second;
\r
127 auto image = render(layers, format_desc);
\r
128 last_image_ = std::make_pair(std::move(layers), image);
\r
133 boost::shared_future<boost::iterator_range<const uint8_t*>> render(std::vector<layer> layers, const core::video_format_desc& format_desc)
\r
135 static const auto empty = spl::make_shared<const std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(2048*2048*4, 0);
\r
136 CASPAR_VERIFY(empty->size() >= format_desc.size);
\r
139 { // Bypass GPU with empty frame.
\r
140 return async(launch_policy::deferred, [=]
\r
142 return boost::iterator_range<const uint8_t*>(empty->data(), empty->data() + format_desc.size);
\r
145 else if(has_uswc_memcpy() &&
\r
146 layers.size() == 1 &&
\r
147 layers.at(0).items.size() == 1 &&
\r
148 (kernel_.has_blend_modes() && layers.at(0).blend_mode != core::blend_mode::normal) == false &&
\r
149 layers.at(0).items.at(0).pix_desc.format == core::pixel_format::bgra &&
\r
150 layers.at(0).items.at(0).buffers.at(0)->size() == format_desc.size &&
\r
151 layers.at(0).items.at(0).transform == core::image_transform())
\r
152 { // Bypass GPU using streaming loads to cachable memory.
\r
153 auto uswc_buffer = layers.at(0).items.at(0).buffers.at(0);
\r
154 auto buffer = std::make_shared<std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(uswc_buffer->size());
\r
156 uswc_memcpy(buffer->data(), uswc_buffer->data(), uswc_buffer->size());
\r
158 return async(launch_policy::deferred, [=]
\r
160 return boost::iterator_range<const uint8_t*>(buffer->data(), buffer->data() + buffer->size());
\r
165 // Start host->device transfers.
\r
167 std::map<const host_buffer*, future_texture> buffer_map;
\r
169 BOOST_FOREACH(auto& layer, layers)
\r
171 BOOST_FOREACH(auto& item, layer.items)
\r
173 auto host_buffers = boost::get<std::vector<spl::shared_ptr<host_buffer>>>(item.buffers);
\r
174 auto textures = std::vector<future_texture>();
\r
176 for(size_t n = 0; n < host_buffers.size(); ++n)
\r
178 auto buffer = host_buffers[n];
\r
179 auto it = buffer_map.find(buffer.get());
\r
180 if(it == buffer_map.end())
\r
182 auto plane = item.pix_desc.planes[n];
\r
183 auto future_texture = ogl_->copy_async(buffer, plane.width, plane.height, plane.channels);
\r
184 it = buffer_map.insert(std::make_pair(buffer.get(), std::move(future_texture))).first;
\r
186 item.textures.push_back(it->second);
\r
188 item.buffers.clear();
\r
193 boost::shared_future<spl::shared_ptr<host_buffer>> buffer = ogl_->begin_invoke([=]() mutable -> spl::shared_ptr<host_buffer>
\r
195 auto draw_buffer = create_mixer_buffer(4, format_desc);
\r
197 if(format_desc.field_mode != core::field_mode::progressive)
\r
199 auto upper = layers;
\r
200 auto lower = std::move(layers);
\r
202 BOOST_FOREACH(auto& layer, upper)
\r
204 BOOST_FOREACH(auto& item, layer.items)
\r
205 item.transform.field_mode &= core::field_mode::upper;
\r
208 BOOST_FOREACH(auto& layer, lower)
\r
210 BOOST_FOREACH(auto& item, layer.items)
\r
211 item.transform.field_mode &= core::field_mode::lower;
\r
214 draw(std::move(upper), draw_buffer, format_desc);
\r
215 draw(std::move(lower), draw_buffer, format_desc);
\r
219 draw(std::move(layers), draw_buffer, format_desc);
\r
222 auto result = ogl_->create_host_buffer(static_cast<int>(format_desc.size), host_buffer::usage::read_only);
\r
223 draw_buffer->copy_to(*result);
\r
227 // Defer memory mapping.
\r
228 return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>
\r
230 const auto& buf = buffer.get();
\r
232 ogl_->invoke(std::bind(&host_buffer::map, std::ref(buf)), task_priority::high_priority);
\r
234 auto ptr = reinterpret_cast<const uint8_t*>(buf->data()); // .get() and ->data() can block calling thread, ->data() can also block OpenGL thread, defer it as long as possible.
\r
235 return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());
\r
240 void draw(std::vector<layer>&& layers,
\r
241 spl::shared_ptr<device_buffer>& draw_buffer,
\r
242 const core::video_format_desc& format_desc)
\r
244 std::shared_ptr<device_buffer> layer_key_buffer;
\r
246 BOOST_FOREACH(auto& layer, layers)
\r
247 draw_layer(std::move(layer), draw_buffer, layer_key_buffer, format_desc);
\r
250 void draw_layer(layer&& layer,
\r
251 spl::shared_ptr<device_buffer>& draw_buffer,
\r
252 std::shared_ptr<device_buffer>& layer_key_buffer,
\r
253 const core::video_format_desc& format_desc)
\r
255 // Remove empty items.
\r
256 boost::range::remove_erase_if(layer.items, [&](const item& item)
\r
258 return item.transform.field_mode == core::field_mode::empty;
\r
261 // Remove first field stills.
\r
262 boost::range::remove_erase_if(layer.items, [&](const item& item)
\r
264 return item.transform.is_still && item.transform.field_mode == format_desc.field_mode; // only us last field for stills.
\r
267 if(layer.items.empty())
\r
270 std::shared_ptr<device_buffer> local_key_buffer;
\r
271 std::shared_ptr<device_buffer> local_mix_buffer;
\r
273 if(layer.blend_mode != core::blend_mode::normal)
\r
275 auto layer_draw_buffer = create_mixer_buffer(4, format_desc);
\r
277 BOOST_FOREACH(auto& item, layer.items)
\r
278 draw_item(std::move(item), layer_draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);
\r
280 draw_mixer_buffer(layer_draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
281 draw_mixer_buffer(draw_buffer, std::move(layer_draw_buffer), layer.blend_mode);
\r
285 BOOST_FOREACH(auto& item, layer.items)
\r
286 draw_item(std::move(item), draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);
\r
288 draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
291 layer_key_buffer = std::move(local_key_buffer);
\r
294 void draw_item(item&& item,
\r
295 spl::shared_ptr<device_buffer>& draw_buffer,
\r
296 std::shared_ptr<device_buffer>& layer_key_buffer,
\r
297 std::shared_ptr<device_buffer>& local_key_buffer,
\r
298 std::shared_ptr<device_buffer>& local_mix_buffer,
\r
299 const core::video_format_desc& format_desc)
\r
301 draw_params draw_params;
\r
302 draw_params.pix_desc = std::move(item.pix_desc);
\r
303 draw_params.transform = std::move(item.transform);
\r
304 BOOST_FOREACH(auto& future_texture, item.textures)
\r
305 draw_params.textures.push_back(future_texture.get());
\r
307 if(item.transform.is_key)
\r
309 local_key_buffer = local_key_buffer ? local_key_buffer : create_mixer_buffer(1, format_desc);
\r
311 draw_params.background = local_key_buffer;
\r
312 draw_params.local_key = nullptr;
\r
313 draw_params.layer_key = nullptr;
\r
315 kernel_.draw(std::move(draw_params));
\r
317 else if(item.transform.is_mix)
\r
319 local_mix_buffer = local_mix_buffer ? local_mix_buffer : create_mixer_buffer(4, format_desc);
\r
321 draw_params.background = local_mix_buffer;
\r
322 draw_params.local_key = std::move(local_key_buffer);
\r
323 draw_params.layer_key = layer_key_buffer;
\r
325 draw_params.keyer = keyer::additive;
\r
327 kernel_.draw(std::move(draw_params));
\r
331 draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
333 draw_params.background = draw_buffer;
\r
334 draw_params.local_key = std::move(local_key_buffer);
\r
335 draw_params.layer_key = layer_key_buffer;
\r
337 kernel_.draw(std::move(draw_params));
\r
341 void draw_mixer_buffer(spl::shared_ptr<device_buffer>& draw_buffer,
\r
342 std::shared_ptr<device_buffer>&& source_buffer,
\r
343 core::blend_mode blend_mode = core::blend_mode::normal)
\r
348 draw_params draw_params;
\r
349 draw_params.pix_desc.format = core::pixel_format::bgra;
\r
350 draw_params.pix_desc.planes = list_of(core::pixel_format_desc::plane(source_buffer->width(), source_buffer->height(), 4));
\r
351 draw_params.textures = list_of(source_buffer);
\r
352 draw_params.transform = core::image_transform();
\r
353 draw_params.blend_mode = blend_mode;
\r
354 draw_params.background = draw_buffer;
\r
356 kernel_.draw(std::move(draw_params));
\r
359 spl::shared_ptr<device_buffer> create_mixer_buffer(int stride, const core::video_format_desc& format_desc)
\r
361 auto buffer = ogl_->create_device_buffer(format_desc.width, format_desc.height, stride);
\r
367 struct image_mixer::impl : boost::noncopyable
\r
369 spl::shared_ptr<context> ogl_;
\r
370 image_renderer renderer_;
\r
371 std::vector<core::image_transform> transform_stack_;
\r
372 std::vector<layer> layers_; // layer/stream/items
\r
374 impl(const spl::shared_ptr<context>& ogl)
\r
377 , transform_stack_(1)
\r
379 CASPAR_LOG(info) << L"Initialized OpenGL Accelerated GPU Image Mixer";
\r
382 void begin_layer(core::blend_mode blend_mode)
\r
384 layers_.push_back(layer(std::vector<item>(), blend_mode));
\r
387 void push(const core::frame_transform& transform)
\r
389 transform_stack_.push_back(transform_stack_.back()*transform.image_transform);
\r
392 void visit(const core::data_frame& frame2)
\r
394 auto frame = dynamic_cast<const write_frame*>(&frame2);
\r
395 if(frame == nullptr)
\r
398 if(frame->get_pixel_format_desc().format == core::pixel_format::invalid)
\r
401 if(frame->get_buffers().empty())
\r
404 if(transform_stack_.back().field_mode == core::field_mode::empty)
\r
408 item.pix_desc = frame->get_pixel_format_desc();
\r
409 item.buffers = frame->get_buffers();
\r
410 item.transform = transform_stack_.back();
\r
412 layers_.back().items.push_back(item);
\r
417 transform_stack_.pop_back();
\r
424 boost::shared_future<boost::iterator_range<const uint8_t*>> render(const core::video_format_desc& format_desc)
\r
426 // Remove empty layers.
\r
427 boost::range::remove_erase_if(layers_, [](const layer& layer)
\r
429 return layer.items.empty();
\r
432 return renderer_(std::move(layers_), format_desc);
\r
435 virtual spl::shared_ptr<ogl::write_frame> create_frame(const void* tag, const core::pixel_format_desc& desc)
\r
437 return spl::make_shared<ogl::write_frame>(ogl_, tag, desc);
\r
441 image_mixer::image_mixer(const spl::shared_ptr<context>& ogl) : impl_(new impl(ogl)){}
\r
442 void image_mixer::push(const core::frame_transform& transform){impl_->push(transform);}
\r
443 void image_mixer::visit(const core::data_frame& frame){impl_->visit(frame);}
\r
444 void image_mixer::pop(){impl_->pop();}
\r
445 boost::shared_future<boost::iterator_range<const uint8_t*>> image_mixer::operator()(const core::video_format_desc& format_desc){return impl_->render(format_desc);}
\r
446 void image_mixer::begin_layer(core::blend_mode blend_mode){impl_->begin_layer(blend_mode);}
\r
447 void image_mixer::end_layer(){impl_->end_layer();}
\r
448 spl::shared_ptr<core::write_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc) {return impl_->create_frame(tag, desc);}
\r