2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
\r
4 * This file is part of CasparCG (www.casparcg.com).
\r
6 * CasparCG is free software: you can redistribute it and/or modify
\r
7 * it under the terms of the GNU General Public License as published by
\r
8 * the Free Software Foundation, either version 3 of the License, or
\r
9 * (at your option) any later version.
\r
11 * CasparCG is distributed in the hope that it will be useful,
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 * GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License
\r
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
\r
19 * Author: Robert Nagy, ronag89@gmail.com
\r
22 #include "../../stdafx.h"
\r
24 #include "image_mixer.h"
\r
26 #include "image_kernel.h"
\r
28 #include "../util/data_frame.h"
\r
29 #include "../util/device.h"
\r
30 #include "../util/host_buffer.h"
\r
31 #include "../util/device_buffer.h"
\r
33 #include <common/gl/gl_check.h>
\r
34 #include <common/concurrency/async.h>
\r
35 #include <common/memory/memcpy.h>
\r
37 #include <core/frame/data_frame.h>
\r
38 #include <core/frame/frame_transform.h>
\r
39 #include <core/frame/pixel_format.h>
\r
40 #include <core/video_format.h>
\r
44 #include <gl/glew.h>
\r
46 #include <boost/foreach.hpp>
\r
47 #include <boost/range/algorithm_ext/erase.hpp>
\r
48 #include <boost/thread/future.hpp>
\r
50 #include <algorithm>
\r
53 using namespace boost::assign;
\r
55 namespace caspar { namespace accelerator { namespace ogl {
\r
57 typedef boost::shared_future<spl::shared_ptr<device_buffer>> future_texture;
\r
61 core::pixel_format_desc pix_desc;
\r
62 core::field_mode field_mode;
\r
63 std::vector<spl::shared_ptr<host_buffer>> buffers;
\r
64 std::vector<future_texture> textures;
\r
65 core::image_transform transform;
\r
68 : pix_desc(core::pixel_format::invalid)
\r
69 , field_mode(core::field_mode::empty)
\r
74 bool operator==(const item& lhs, const item& rhs)
\r
76 return lhs.buffers == rhs.buffers && lhs.transform == rhs.transform;
\r
79 bool operator!=(const item& lhs, const item& rhs)
\r
81 return !(lhs == rhs);
\r
86 std::vector<item> items;
\r
87 core::blend_mode blend_mode;
\r
90 : blend_mode(core::blend_mode::normal)
\r
94 layer(std::vector<item> items, core::blend_mode blend_mode)
\r
95 : items(std::move(items))
\r
96 , blend_mode(blend_mode)
\r
101 bool operator==(const layer& lhs, const layer& rhs)
\r
103 return lhs.items == rhs.items && lhs.blend_mode == rhs.blend_mode;
\r
106 bool operator!=(const layer& lhs, const layer& rhs)
\r
108 return !(lhs == rhs);
\r
111 class image_renderer
\r
113 spl::shared_ptr<device> ogl_;
\r
114 image_kernel kernel_;
\r
115 std::pair<std::vector<layer>, boost::shared_future<boost::iterator_range<const uint8_t*>>> last_image_;
\r
117 image_renderer(const spl::shared_ptr<device>& ogl)
\r
123 boost::shared_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)
\r
125 if(last_image_.first == layers && last_image_.second.has_value())
\r
126 return last_image_.second;
\r
128 auto image = render(layers, format_desc);
\r
129 last_image_ = std::make_pair(std::move(layers), image);
\r
134 boost::shared_future<boost::iterator_range<const uint8_t*>> render(std::vector<layer> layers, const core::video_format_desc& format_desc)
\r
137 { // Bypass GPU with empty frame.
\r
138 auto buffer = spl::make_shared<const std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(format_desc.size, 0);
\r
139 return async(launch_policy::deferred, [=]
\r
141 return boost::iterator_range<const uint8_t*>(buffer->data(), buffer->data() + format_desc.size);
\r
144 else if(has_uswc_memcpy() &&
\r
145 layers.size() == 1 &&
\r
146 layers.at(0).items.size() == 1 &&
\r
147 (kernel_.has_blend_modes() && layers.at(0).blend_mode != core::blend_mode::normal) == false &&
\r
148 layers.at(0).items.at(0).pix_desc.format == core::pixel_format::bgra &&
\r
149 layers.at(0).items.at(0).buffers.at(0)->size() == format_desc.size &&
\r
150 layers.at(0).items.at(0).transform == core::image_transform())
\r
151 { // Bypass GPU using streaming loads to cachable memory.
\r
152 auto uswc_buffer = layers.at(0).items.at(0).buffers.at(0);
\r
153 auto buffer = std::make_shared<std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(uswc_buffer->size());
\r
155 uswc_memcpy(buffer->data(), uswc_buffer->data(), uswc_buffer->size());
\r
157 return async(launch_policy::deferred, [=]
\r
159 return boost::iterator_range<const uint8_t*>(buffer->data(), buffer->data() + buffer->size());
\r
164 // Start host->device transfers.
\r
166 std::map<const host_buffer*, future_texture> buffer_map;
\r
168 BOOST_FOREACH(auto& layer, layers)
\r
170 BOOST_FOREACH(auto& item, layer.items)
\r
172 auto host_buffers = boost::get<std::vector<spl::shared_ptr<host_buffer>>>(item.buffers);
\r
173 auto textures = std::vector<future_texture>();
\r
175 for(size_t n = 0; n < host_buffers.size(); ++n)
\r
177 auto buffer = host_buffers[n];
\r
178 auto it = buffer_map.find(buffer.get());
\r
179 if(it == buffer_map.end())
\r
181 auto plane = item.pix_desc.planes[n];
\r
182 auto future_texture = ogl_->copy_async(buffer, plane.width, plane.height, plane.channels);
\r
183 it = buffer_map.insert(std::make_pair(buffer.get(), std::move(future_texture))).first;
\r
185 item.textures.push_back(it->second);
\r
187 item.buffers.clear();
\r
192 boost::shared_future<spl::shared_ptr<host_buffer>> buffer = ogl_->begin_invoke([=]() mutable -> spl::shared_ptr<host_buffer>
\r
194 auto draw_buffer = create_mixer_buffer(4, format_desc);
\r
196 if(format_desc.field_mode != core::field_mode::progressive)
\r
198 auto upper = layers;
\r
199 auto lower = std::move(layers);
\r
201 BOOST_FOREACH(auto& layer, upper)
\r
203 BOOST_FOREACH(auto& item, layer.items)
\r
204 item.transform.field_mode &= core::field_mode::upper;
\r
207 BOOST_FOREACH(auto& layer, lower)
\r
209 BOOST_FOREACH(auto& item, layer.items)
\r
210 item.transform.field_mode &= core::field_mode::lower;
\r
213 draw(std::move(upper), draw_buffer, format_desc);
\r
214 draw(std::move(lower), draw_buffer, format_desc);
\r
218 draw(std::move(layers), draw_buffer, format_desc);
\r
221 auto result = ogl_->create_host_buffer(static_cast<int>(format_desc.size), host_buffer::usage::read_only);
\r
222 draw_buffer->copy_to(*result);
\r
226 // Defer memory mapping.
\r
227 return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>
\r
229 const auto& buf = buffer.get();
\r
231 ogl_->invoke(std::bind(&host_buffer::map, std::ref(buf)), task_priority::high_priority);
\r
233 auto ptr = reinterpret_cast<const uint8_t*>(buf->data()); // .get() and ->data() can block calling thread, ->data() can also block OpenGL thread, defer it as long as possible.
\r
234 return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());
\r
239 void draw(std::vector<layer>&& layers,
\r
240 spl::shared_ptr<device_buffer>& draw_buffer,
\r
241 const core::video_format_desc& format_desc)
\r
243 std::shared_ptr<device_buffer> layer_key_buffer;
\r
245 BOOST_FOREACH(auto& layer, layers)
\r
246 draw_layer(std::move(layer), draw_buffer, layer_key_buffer, format_desc);
\r
249 void draw_layer(layer&& layer,
\r
250 spl::shared_ptr<device_buffer>& draw_buffer,
\r
251 std::shared_ptr<device_buffer>& layer_key_buffer,
\r
252 const core::video_format_desc& format_desc)
\r
254 // Remove empty items.
\r
255 boost::range::remove_erase_if(layer.items, [&](const item& item)
\r
257 return item.transform.field_mode == core::field_mode::empty;
\r
260 // Remove first field stills.
\r
261 boost::range::remove_erase_if(layer.items, [&](const item& item)
\r
263 return item.transform.is_still && item.transform.field_mode == format_desc.field_mode; // only us last field for stills.
\r
266 if(layer.items.empty())
\r
269 std::shared_ptr<device_buffer> local_key_buffer;
\r
270 std::shared_ptr<device_buffer> local_mix_buffer;
\r
272 if(layer.blend_mode != core::blend_mode::normal)
\r
274 auto layer_draw_buffer = create_mixer_buffer(4, format_desc);
\r
276 BOOST_FOREACH(auto& item, layer.items)
\r
277 draw_item(std::move(item), layer_draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);
\r
279 draw_mixer_buffer(layer_draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
280 draw_mixer_buffer(draw_buffer, std::move(layer_draw_buffer), layer.blend_mode);
\r
284 BOOST_FOREACH(auto& item, layer.items)
\r
285 draw_item(std::move(item), draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);
\r
287 draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
290 layer_key_buffer = std::move(local_key_buffer);
\r
293 void draw_item(item&& item,
\r
294 spl::shared_ptr<device_buffer>& draw_buffer,
\r
295 std::shared_ptr<device_buffer>& layer_key_buffer,
\r
296 std::shared_ptr<device_buffer>& local_key_buffer,
\r
297 std::shared_ptr<device_buffer>& local_mix_buffer,
\r
298 const core::video_format_desc& format_desc)
\r
300 if(item.pix_desc.planes.at(0).height == 480) // NTSC DV
\r
302 item.transform.fill_translation[1] += 2.0/static_cast<double>(format_desc.height);
\r
303 item.transform.fill_scale[1] = 1.0 - 6.0*1.0/static_cast<double>(format_desc.height);
\r
306 // Fix field-order if needed
\r
307 if(item.field_mode == core::field_mode::lower && format_desc.field_mode == core::field_mode::upper)
\r
308 item.transform.fill_translation[1] += 1.0/static_cast<double>(format_desc.height);
\r
309 else if(item.field_mode == core::field_mode::upper && format_desc.field_mode == core::field_mode::lower)
\r
310 item.transform.fill_translation[1] -= 1.0/static_cast<double>(format_desc.height);
\r
312 draw_params draw_params;
\r
313 draw_params.pix_desc = std::move(item.pix_desc);
\r
314 draw_params.transform = std::move(item.transform);
\r
315 BOOST_FOREACH(auto& future_texture, item.textures)
\r
316 draw_params.textures.push_back(future_texture.get());
\r
318 if(item.transform.is_key)
\r
320 local_key_buffer = local_key_buffer ? local_key_buffer : create_mixer_buffer(1, format_desc);
\r
322 draw_params.background = local_key_buffer;
\r
323 draw_params.local_key = nullptr;
\r
324 draw_params.layer_key = nullptr;
\r
326 kernel_.draw(std::move(draw_params));
\r
328 else if(item.transform.is_mix)
\r
330 local_mix_buffer = local_mix_buffer ? local_mix_buffer : create_mixer_buffer(4, format_desc);
\r
332 draw_params.background = local_mix_buffer;
\r
333 draw_params.local_key = std::move(local_key_buffer);
\r
334 draw_params.layer_key = layer_key_buffer;
\r
336 draw_params.keyer = keyer::additive;
\r
338 kernel_.draw(std::move(draw_params));
\r
342 draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
344 draw_params.background = draw_buffer;
\r
345 draw_params.local_key = std::move(local_key_buffer);
\r
346 draw_params.layer_key = layer_key_buffer;
\r
348 kernel_.draw(std::move(draw_params));
\r
352 void draw_mixer_buffer(spl::shared_ptr<device_buffer>& draw_buffer,
\r
353 std::shared_ptr<device_buffer>&& source_buffer,
\r
354 core::blend_mode blend_mode = core::blend_mode::normal)
\r
359 draw_params draw_params;
\r
360 draw_params.pix_desc.format = core::pixel_format::bgra;
\r
361 draw_params.pix_desc.planes = list_of(core::pixel_format_desc::plane(source_buffer->width(), source_buffer->height(), 4));
\r
362 draw_params.textures = list_of(source_buffer);
\r
363 draw_params.transform = core::image_transform();
\r
364 draw_params.blend_mode = blend_mode;
\r
365 draw_params.background = draw_buffer;
\r
367 kernel_.draw(std::move(draw_params));
\r
370 spl::shared_ptr<device_buffer> create_mixer_buffer(int stride, const core::video_format_desc& format_desc)
\r
372 auto buffer = ogl_->create_device_buffer(format_desc.width, format_desc.height, stride);
\r
378 struct image_mixer::impl : boost::noncopyable
\r
380 spl::shared_ptr<device> ogl_;
\r
381 image_renderer renderer_;
\r
382 std::vector<core::image_transform> transform_stack_;
\r
383 std::vector<layer> layers_; // layer/stream/items
\r
385 impl(const spl::shared_ptr<device>& ogl)
\r
388 , transform_stack_(1)
\r
390 CASPAR_LOG(info) << L"Initialized OpenGL Accelerated GPU Image Mixer";
\r
393 void begin_layer(core::blend_mode blend_mode)
\r
395 layers_.push_back(layer(std::vector<item>(), blend_mode));
\r
398 void push(const core::frame_transform& transform)
\r
400 transform_stack_.push_back(transform_stack_.back()*transform.image_transform);
\r
403 void visit(const core::data_frame& frame2)
\r
405 auto frame = dynamic_cast<const ogl::data_frame*>(&frame2);
\r
406 if(frame == nullptr)
\r
409 if(frame->pixel_format_desc().format == core::pixel_format::invalid)
\r
412 if(frame->buffers().empty())
\r
415 if(transform_stack_.back().field_mode == core::field_mode::empty)
\r
419 item.pix_desc = frame->pixel_format_desc();
\r
420 item.field_mode = frame->field_mode();
\r
421 item.buffers = frame->buffers();
\r
422 item.transform = transform_stack_.back();
\r
424 layers_.back().items.push_back(item);
\r
429 transform_stack_.pop_back();
\r
436 boost::shared_future<boost::iterator_range<const uint8_t*>> render(const core::video_format_desc& format_desc)
\r
438 // Remove empty layers.
\r
439 boost::range::remove_erase_if(layers_, [](const layer& layer)
\r
441 return layer.items.empty();
\r
444 return renderer_(std::move(layers_), format_desc);
\r
447 virtual spl::unique_ptr<core::data_frame> create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode)
\r
449 return spl::make_unique<ogl::data_frame>(ogl_, tag, desc, frame_rate, field_mode);
\r
453 image_mixer::image_mixer(const spl::shared_ptr<device>& ogl) : impl_(new impl(ogl)){}
\r
454 void image_mixer::push(const core::frame_transform& transform){impl_->push(transform);}
\r
455 void image_mixer::visit(const core::data_frame& frame){impl_->visit(frame);}
\r
456 void image_mixer::pop(){impl_->pop();}
\r
457 boost::shared_future<boost::iterator_range<const uint8_t*>> image_mixer::operator()(const core::video_format_desc& format_desc){return impl_->render(format_desc);}
\r
458 void image_mixer::begin_layer(core::blend_mode blend_mode){impl_->begin_layer(blend_mode);}
\r
459 void image_mixer::end_layer(){impl_->end_layer();}
\r
460 spl::unique_ptr<core::data_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode) {return impl_->create_frame(tag, desc, frame_rate, field_mode);}
\r