2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
\r
4 * This file is part of CasparCG (www.casparcg.com).
\r
6 * CasparCG is free software: you can redistribute it and/or modify
\r
7 * it under the terms of the GNU General Public License as published by
\r
8 * the Free Software Foundation, either version 3 of the License, or
\r
9 * (at your option) any later version.
\r
11 * CasparCG is distributed in the hope that it will be useful,
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 * GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License
\r
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
\r
19 * Author: Robert Nagy, ronag89@gmail.com
\r
22 #include "../../stdafx.h"
\r
24 #include "image_mixer.h"
\r
26 #include "image_kernel.h"
\r
28 #include "../util/write_frame.h"
\r
29 #include "../util/context.h"
\r
30 #include "../util/host_buffer.h"
\r
31 #include "../util/device_buffer.h"
\r
33 #include <common/gl/gl_check.h>
\r
34 #include <common/concurrency/async.h>
\r
35 #include <common/memory/memcpy.h>
\r
37 #include <core/frame/write_frame.h>
\r
38 #include <core/frame/frame_transform.h>
\r
39 #include <core/frame/pixel_format.h>
\r
40 #include <core/video_format.h>
\r
42 #include <modules/ffmpeg/producer/filter/filter.h>
\r
46 #include <gl/glew.h>
\r
48 #include <boost/foreach.hpp>
\r
49 #include <boost/range/algorithm_ext/erase.hpp>
\r
50 #include <boost/thread/future.hpp>
\r
52 #include <algorithm>
\r
55 using namespace boost::assign;
\r
57 namespace caspar { namespace accelerator { namespace ogl {
\r
59 typedef boost::shared_future<spl::shared_ptr<device_buffer>> future_texture;
\r
63 core::pixel_format_desc pix_desc;
\r
64 std::vector<spl::shared_ptr<host_buffer>> buffers;
\r
65 std::vector<future_texture> textures;
\r
66 core::image_transform transform;
\r
70 : pix_desc(core::pixel_format::invalid)
\r
75 bool operator==(const item& lhs, const item& rhs)
\r
77 return lhs.buffers == rhs.buffers && lhs.transform == rhs.transform;
\r
80 bool operator!=(const item& lhs, const item& rhs)
\r
82 return !(lhs == rhs);
\r
87 std::vector<item> items;
\r
88 core::blend_mode blend_mode;
\r
91 : blend_mode(core::blend_mode::normal)
\r
95 layer(std::vector<item> items, core::blend_mode blend_mode)
\r
96 : items(std::move(items))
\r
97 , blend_mode(blend_mode)
\r
102 bool operator==(const layer& lhs, const layer& rhs)
\r
104 return lhs.items == rhs.items && lhs.blend_mode == rhs.blend_mode;
\r
107 bool operator!=(const layer& lhs, const layer& rhs)
\r
109 return !(lhs == rhs);
\r
112 class image_renderer
\r
114 spl::shared_ptr<context> ogl_;
\r
115 image_kernel kernel_;
\r
116 std::pair<std::vector<layer>, boost::shared_future<boost::iterator_range<const uint8_t*>>> last_image_;
\r
117 ffmpeg::filter deinterlacer_;
\r
119 image_renderer(const spl::shared_ptr<context>& ogl)
\r
122 , deinterlacer_(L"YADIF=0:-1")
\r
126 boost::shared_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)
\r
128 if(last_image_.first == layers && last_image_.second.has_value())
\r
129 return last_image_.second;
\r
131 auto image = render(layers, format_desc);
\r
132 last_image_ = std::make_pair(std::move(layers), image);
\r
137 boost::shared_future<boost::iterator_range<const uint8_t*>> render(std::vector<layer> layers, const core::video_format_desc& format_desc)
\r
139 static const auto empty = spl::make_shared<const std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(2048*2048*4, 0);
\r
140 CASPAR_VERIFY(empty->size() >= format_desc.size);
\r
143 { // Bypass GPU with empty frame.
\r
144 return async(launch_policy::deferred, [=]
\r
146 return boost::iterator_range<const uint8_t*>(empty->data(), empty->data() + format_desc.size);
\r
149 else if(has_uswc_memcpy() &&
\r
150 layers.size() == 1 &&
\r
151 layers.at(0).items.size() == 1 &&
\r
152 (kernel_.has_blend_modes() && layers.at(0).blend_mode != core::blend_mode::normal) == false &&
\r
153 layers.at(0).items.at(0).pix_desc.format == core::pixel_format::bgra &&
\r
154 layers.at(0).items.at(0).buffers.at(0)->size() == format_desc.size &&
\r
155 layers.at(0).items.at(0).transform == core::image_transform())
\r
156 { // Bypass GPU using streaming loads to cachable memory.
\r
157 auto uswc_buffer = layers.at(0).items.at(0).buffers.at(0);
\r
158 auto buffer = std::make_shared<std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(uswc_buffer->size());
\r
160 uswc_memcpy(buffer->data(), uswc_buffer->data(), uswc_buffer->size());
\r
162 return async(launch_policy::deferred, [=]
\r
164 return boost::iterator_range<const uint8_t*>(buffer->data(), buffer->data() + buffer->size());
\r
169 // Start host->device transfers.
\r
171 std::map<const host_buffer*, future_texture> buffer_map;
\r
173 BOOST_FOREACH(auto& layer, layers)
\r
175 BOOST_FOREACH(auto& item, layer.items)
\r
177 auto host_buffers = boost::get<std::vector<spl::shared_ptr<host_buffer>>>(item.buffers);
\r
178 auto textures = std::vector<future_texture>();
\r
180 for(size_t n = 0; n < host_buffers.size(); ++n)
\r
182 auto buffer = host_buffers[n];
\r
183 auto it = buffer_map.find(buffer.get());
\r
184 if(it == buffer_map.end())
\r
186 auto plane = item.pix_desc.planes[n];
\r
187 auto future_texture = ogl_->copy_async(buffer, plane.width, plane.height, plane.channels);
\r
188 it = buffer_map.insert(std::make_pair(buffer.get(), std::move(future_texture))).first;
\r
190 item.textures.push_back(it->second);
\r
192 item.buffers.clear();
\r
197 boost::shared_future<spl::shared_ptr<host_buffer>> buffer = ogl_->begin_invoke([=]() mutable -> spl::shared_ptr<host_buffer>
\r
199 auto draw_buffer = create_mixer_buffer(4, format_desc);
\r
201 if(format_desc.field_mode != core::field_mode::progressive)
\r
203 auto upper = layers;
\r
204 auto lower = std::move(layers);
\r
206 BOOST_FOREACH(auto& layer, upper)
\r
208 BOOST_FOREACH(auto& item, layer.items)
\r
209 item.transform.field_mode &= core::field_mode::upper;
\r
212 BOOST_FOREACH(auto& layer, lower)
\r
214 BOOST_FOREACH(auto& item, layer.items)
\r
215 item.transform.field_mode &= core::field_mode::lower;
\r
218 draw(std::move(upper), draw_buffer, format_desc);
\r
219 draw(std::move(lower), draw_buffer, format_desc);
\r
223 draw(std::move(layers), draw_buffer, format_desc);
\r
226 auto result = ogl_->create_host_buffer(static_cast<int>(format_desc.size), host_buffer::usage::read_only);
\r
227 draw_buffer->copy_to(*result);
\r
231 // Defer memory mapping.
\r
232 return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>
\r
234 const auto& buf = buffer.get();
\r
236 ogl_->invoke(std::bind(&host_buffer::map, std::ref(buf)), task_priority::high_priority);
\r
238 auto ptr = reinterpret_cast<const uint8_t*>(buf->data()); // .get() and ->data() can block calling thread, ->data() can also block OpenGL thread, defer it as long as possible.
\r
239 return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());
\r
244 void draw(std::vector<layer>&& layers,
\r
245 spl::shared_ptr<device_buffer>& draw_buffer,
\r
246 const core::video_format_desc& format_desc)
\r
248 std::shared_ptr<device_buffer> layer_key_buffer;
\r
250 BOOST_FOREACH(auto& layer, layers)
\r
251 draw_layer(std::move(layer), draw_buffer, layer_key_buffer, format_desc);
\r
254 void draw_layer(layer&& layer,
\r
255 spl::shared_ptr<device_buffer>& draw_buffer,
\r
256 std::shared_ptr<device_buffer>& layer_key_buffer,
\r
257 const core::video_format_desc& format_desc)
\r
259 // Remove empty items.
\r
260 boost::range::remove_erase_if(layer.items, [&](const item& item)
\r
262 return item.transform.field_mode == core::field_mode::empty;
\r
265 // Remove first field stills.
\r
266 boost::range::remove_erase_if(layer.items, [&](const item& item)
\r
268 return item.transform.is_still && item.transform.field_mode == format_desc.field_mode; // only us last field for stills.
\r
271 if(layer.items.empty())
\r
274 std::shared_ptr<device_buffer> local_key_buffer;
\r
275 std::shared_ptr<device_buffer> local_mix_buffer;
\r
277 if(layer.blend_mode != core::blend_mode::normal)
\r
279 auto layer_draw_buffer = create_mixer_buffer(4, format_desc);
\r
281 BOOST_FOREACH(auto& item, layer.items)
\r
282 draw_item(std::move(item), layer_draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);
\r
284 draw_mixer_buffer(layer_draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
285 draw_mixer_buffer(draw_buffer, std::move(layer_draw_buffer), layer.blend_mode);
\r
289 BOOST_FOREACH(auto& item, layer.items)
\r
290 draw_item(std::move(item), draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);
\r
292 draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
295 layer_key_buffer = std::move(local_key_buffer);
\r
298 void draw_item(item&& item,
\r
299 spl::shared_ptr<device_buffer>& draw_buffer,
\r
300 std::shared_ptr<device_buffer>& layer_key_buffer,
\r
301 std::shared_ptr<device_buffer>& local_key_buffer,
\r
302 std::shared_ptr<device_buffer>& local_mix_buffer,
\r
303 const core::video_format_desc& format_desc)
\r
305 draw_params draw_params;
\r
306 draw_params.pix_desc = std::move(item.pix_desc);
\r
307 draw_params.transform = std::move(item.transform);
\r
308 BOOST_FOREACH(auto& future_texture, item.textures)
\r
309 draw_params.textures.push_back(future_texture.get());
\r
311 if(item.transform.is_key)
\r
313 local_key_buffer = local_key_buffer ? local_key_buffer : create_mixer_buffer(1, format_desc);
\r
315 draw_params.background = local_key_buffer;
\r
316 draw_params.local_key = nullptr;
\r
317 draw_params.layer_key = nullptr;
\r
319 kernel_.draw(std::move(draw_params));
\r
321 else if(item.transform.is_mix)
\r
323 local_mix_buffer = local_mix_buffer ? local_mix_buffer : create_mixer_buffer(4, format_desc);
\r
325 draw_params.background = local_mix_buffer;
\r
326 draw_params.local_key = std::move(local_key_buffer);
\r
327 draw_params.layer_key = layer_key_buffer;
\r
329 draw_params.keyer = keyer::additive;
\r
331 kernel_.draw(std::move(draw_params));
\r
335 draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);
\r
337 draw_params.background = draw_buffer;
\r
338 draw_params.local_key = std::move(local_key_buffer);
\r
339 draw_params.layer_key = layer_key_buffer;
\r
341 kernel_.draw(std::move(draw_params));
\r
345 void draw_mixer_buffer(spl::shared_ptr<device_buffer>& draw_buffer,
\r
346 std::shared_ptr<device_buffer>&& source_buffer,
\r
347 core::blend_mode blend_mode = core::blend_mode::normal)
\r
352 draw_params draw_params;
\r
353 draw_params.pix_desc.format = core::pixel_format::bgra;
\r
354 draw_params.pix_desc.planes = list_of(core::pixel_format_desc::plane(source_buffer->width(), source_buffer->height(), 4));
\r
355 draw_params.textures = list_of(source_buffer);
\r
356 draw_params.transform = core::image_transform();
\r
357 draw_params.blend_mode = blend_mode;
\r
358 draw_params.background = draw_buffer;
\r
360 kernel_.draw(std::move(draw_params));
\r
363 spl::shared_ptr<device_buffer> create_mixer_buffer(int stride, const core::video_format_desc& format_desc)
\r
365 auto buffer = ogl_->create_device_buffer(format_desc.width, format_desc.height, stride);
\r
371 struct image_mixer::impl : boost::noncopyable
\r
373 spl::shared_ptr<context> ogl_;
\r
374 image_renderer renderer_;
\r
375 std::vector<core::image_transform> transform_stack_;
\r
376 std::vector<layer> layers_; // layer/stream/items
\r
378 impl(const spl::shared_ptr<context>& ogl)
\r
381 , transform_stack_(1)
\r
383 CASPAR_LOG(info) << L"Initialized OpenGL Accelerated GPU Image Mixer";
\r
386 void begin_layer(core::blend_mode blend_mode)
\r
388 layers_.push_back(layer(std::vector<item>(), blend_mode));
\r
391 void push(const core::frame_transform& transform)
\r
393 transform_stack_.push_back(transform_stack_.back()*transform.image_transform);
\r
396 void visit(const core::data_frame& frame2)
\r
398 auto frame = dynamic_cast<const write_frame*>(&frame2);
\r
399 if(frame == nullptr)
\r
402 if(frame->get_pixel_format_desc().format == core::pixel_format::invalid)
\r
405 if(frame->get_buffers().empty())
\r
408 if(transform_stack_.back().field_mode == core::field_mode::empty)
\r
412 item.pix_desc = frame->get_pixel_format_desc();
\r
413 item.buffers = frame->get_buffers();
\r
414 item.transform = transform_stack_.back();
\r
416 layers_.back().items.push_back(item);
\r
421 transform_stack_.pop_back();
\r
428 boost::shared_future<boost::iterator_range<const uint8_t*>> render(const core::video_format_desc& format_desc)
\r
430 // Remove empty layers.
\r
431 boost::range::remove_erase_if(layers_, [](const layer& layer)
\r
433 return layer.items.empty();
\r
436 return renderer_(std::move(layers_), format_desc);
\r
439 virtual spl::shared_ptr<ogl::write_frame> create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode)
\r
441 return spl::make_shared<ogl::write_frame>(ogl_, tag, desc, frame_rate, field_mode);
\r
445 image_mixer::image_mixer(const spl::shared_ptr<context>& ogl) : impl_(new impl(ogl)){}
\r
446 void image_mixer::push(const core::frame_transform& transform){impl_->push(transform);}
\r
447 void image_mixer::visit(const core::data_frame& frame){impl_->visit(frame);}
\r
448 void image_mixer::pop(){impl_->pop();}
\r
449 boost::shared_future<boost::iterator_range<const uint8_t*>> image_mixer::operator()(const core::video_format_desc& format_desc){return impl_->render(format_desc);}
\r
450 void image_mixer::begin_layer(core::blend_mode blend_mode){impl_->begin_layer(blend_mode);}
\r
451 void image_mixer::end_layer(){impl_->end_layer();}
\r
452 spl::shared_ptr<core::write_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode) {return impl_->create_frame(tag, desc, frame_rate, field_mode);}
\r