]> git.sesse.net Git - casparcg/blob - accelerator/ogl/image/image_mixer.cpp
2.1.0: Refactored away "write_frame", only "data_frame" and "draw_frame" are needed.
[casparcg] / accelerator / ogl / image / image_mixer.cpp
1 /*\r
2 * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>\r
3 *\r
4 * This file is part of CasparCG (www.casparcg.com).\r
5 *\r
6 * CasparCG is free software: you can redistribute it and/or modify\r
7 * it under the terms of the GNU General Public License as published by\r
8 * the Free Software Foundation, either version 3 of the License, or\r
9 * (at your option) any later version.\r
10 *\r
11 * CasparCG is distributed in the hope that it will be useful,\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
14 * GNU General Public License for more details.\r
15 *\r
16 * You should have received a copy of the GNU General Public License\r
17 * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.\r
18 *\r
19 * Author: Robert Nagy, ronag89@gmail.com\r
20 */\r
21 \r
22 #include "../../stdafx.h"\r
23 \r
24 #include "image_mixer.h"\r
25 \r
26 #include "image_kernel.h"\r
27 \r
28 #include "../util/data_frame.h"\r
29 #include "../util/context.h"\r
30 #include "../util/host_buffer.h"\r
31 #include "../util/device_buffer.h"\r
32 \r
33 #include <common/gl/gl_check.h>\r
34 #include <common/concurrency/async.h>\r
35 #include <common/memory/memcpy.h>\r
36 \r
37 #include <core/frame/data_frame.h>\r
38 #include <core/frame/frame_transform.h>\r
39 #include <core/frame/pixel_format.h>\r
40 #include <core/video_format.h>\r
41 \r
42 #include <asmlib.h>\r
43 \r
44 #include <gl/glew.h>\r
45 \r
46 #include <boost/foreach.hpp>\r
47 #include <boost/range/algorithm_ext/erase.hpp>\r
48 #include <boost/thread/future.hpp>\r
49 \r
50 #include <algorithm>\r
51 #include <vector>\r
52 \r
53 using namespace boost::assign;\r
54 \r
55 namespace caspar { namespace accelerator { namespace ogl {\r
56                 \r
57 typedef boost::shared_future<spl::shared_ptr<device_buffer>> future_texture;\r
58 \r
59 struct item\r
60 {\r
61         core::pixel_format_desc                                         pix_desc;\r
62         core::field_mode                                                        field_mode;\r
63         std::vector<spl::shared_ptr<host_buffer>>       buffers;\r
64         std::vector<future_texture>                                     textures;\r
65         core::image_transform                                           transform;\r
66 \r
67         item()\r
68                 : pix_desc(core::pixel_format::invalid)\r
69                 , field_mode(core::field_mode::empty)\r
70         {\r
71         }\r
72 };\r
73 \r
74 bool operator==(const item& lhs, const item& rhs)\r
75 {\r
76         return lhs.buffers == rhs.buffers && lhs.transform == rhs.transform;\r
77 }\r
78 \r
79 bool operator!=(const item& lhs, const item& rhs)\r
80 {\r
81         return !(lhs == rhs);\r
82 }\r
83 \r
84 struct layer\r
85 {\r
86         std::vector<item>       items;\r
87         core::blend_mode        blend_mode;\r
88 \r
89         layer()\r
90                 : blend_mode(core::blend_mode::normal)\r
91         {\r
92         }\r
93 \r
94         layer(std::vector<item> items, core::blend_mode blend_mode)\r
95                 : items(std::move(items))\r
96                 , blend_mode(blend_mode)\r
97         {\r
98         }\r
99 };\r
100 \r
101 bool operator==(const layer& lhs, const layer& rhs)\r
102 {\r
103         return lhs.items == rhs.items && lhs.blend_mode == rhs.blend_mode;\r
104 }\r
105 \r
106 bool operator!=(const layer& lhs, const layer& rhs)\r
107 {\r
108         return !(lhs == rhs);\r
109 }\r
110 \r
111 class image_renderer\r
112 {\r
113         spl::shared_ptr<context>                                                                                                                                                ogl_;\r
114         image_kernel                                                                                                                                                                    kernel_;\r
115         std::pair<std::vector<layer>, boost::shared_future<boost::iterator_range<const uint8_t*>>>              last_image_;    \r
116 public:\r
117         image_renderer(const spl::shared_ptr<context>& ogl)\r
118                 : ogl_(ogl)\r
119                 , kernel_(ogl_)\r
120         {\r
121         }\r
122         \r
123         boost::shared_future<boost::iterator_range<const uint8_t*>> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)\r
124         {       \r
125                 if(last_image_.first == layers && last_image_.second.has_value())\r
126                         return last_image_.second;\r
127 \r
128                 auto image      = render(layers, format_desc);\r
129                 last_image_ = std::make_pair(std::move(layers), image);\r
130                 return image;\r
131         }\r
132 \r
133 private:        \r
134         boost::shared_future<boost::iterator_range<const uint8_t*>> render(std::vector<layer> layers, const core::video_format_desc& format_desc)\r
135         {       \r
136                 static const auto empty = spl::make_shared<const std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(2048*2048*4, 0);\r
137                 CASPAR_VERIFY(empty->size() >= format_desc.size);\r
138                 \r
139                 if(layers.empty())\r
140                 { // Bypass GPU with empty frame.\r
141                         return async(launch_policy::deferred, [=]\r
142                         {\r
143                                 return boost::iterator_range<const uint8_t*>(empty->data(), empty->data() + format_desc.size);\r
144                         });\r
145                 }\r
146                 else if(has_uswc_memcpy() &&                            \r
147                                 layers.size()                           == 1 &&\r
148                             layers.at(0).items.size()   == 1 &&\r
149                            (kernel_.has_blend_modes() && layers.at(0).blend_mode != core::blend_mode::normal) == false &&\r
150                             layers.at(0).items.at(0).pix_desc.format            == core::pixel_format::bgra &&\r
151                             layers.at(0).items.at(0).buffers.at(0)->size() == format_desc.size &&\r
152                             layers.at(0).items.at(0).transform                          == core::image_transform())\r
153                 { // Bypass GPU using streaming loads to cachable memory.\r
154                         auto uswc_buffer = layers.at(0).items.at(0).buffers.at(0);\r
155                         auto buffer              = std::make_shared<std::vector<uint8_t, tbb::cache_aligned_allocator<uint8_t>>>(uswc_buffer->size());\r
156 \r
157                         uswc_memcpy(buffer->data(), uswc_buffer->data(), uswc_buffer->size());\r
158 \r
159                         return async(launch_policy::deferred, [=]\r
160                         {\r
161                                 return boost::iterator_range<const uint8_t*>(buffer->data(), buffer->data() + buffer->size());\r
162                         });\r
163                 }\r
164                 else\r
165                 {       \r
166                         // Start host->device transfers.\r
167 \r
168                         std::map<const host_buffer*, future_texture> buffer_map;\r
169 \r
170                         BOOST_FOREACH(auto& layer, layers)\r
171                         {\r
172                                 BOOST_FOREACH(auto& item, layer.items)\r
173                                 {\r
174                                         auto host_buffers = boost::get<std::vector<spl::shared_ptr<host_buffer>>>(item.buffers);\r
175                                         auto textures     = std::vector<future_texture>();\r
176 \r
177                                         for(size_t n = 0; n < host_buffers.size(); ++n) \r
178                                         {\r
179                                                 auto buffer     = host_buffers[n];\r
180                                                 auto it         = buffer_map.find(buffer.get());\r
181                                                 if(it == buffer_map.end())\r
182                                                 {\r
183                                                         auto plane                      = item.pix_desc.planes[n];\r
184                                                         auto future_texture     = ogl_->copy_async(buffer, plane.width, plane.height, plane.channels);\r
185                                                         it = buffer_map.insert(std::make_pair(buffer.get(), std::move(future_texture))).first;\r
186                                                 }\r
187                                                 item.textures.push_back(it->second);\r
188                                         }       \r
189                                         item.buffers.clear();\r
190                                 }\r
191                         }       \r
192                         \r
193                         // Draw\r
194                         boost::shared_future<spl::shared_ptr<host_buffer>> buffer = ogl_->begin_invoke([=]() mutable -> spl::shared_ptr<host_buffer>\r
195                         {\r
196                                 auto draw_buffer = create_mixer_buffer(4, format_desc);\r
197 \r
198                                 if(format_desc.field_mode != core::field_mode::progressive)\r
199                                 {\r
200                                         auto upper = layers;\r
201                                         auto lower = std::move(layers);\r
202 \r
203                                         BOOST_FOREACH(auto& layer, upper)\r
204                                         {\r
205                                                 BOOST_FOREACH(auto& item, layer.items)\r
206                                                         item.transform.field_mode &= core::field_mode::upper;\r
207                                         }\r
208 \r
209                                         BOOST_FOREACH(auto& layer, lower)\r
210                                         {\r
211                                                 BOOST_FOREACH(auto& item, layer.items)\r
212                                                         item.transform.field_mode &= core::field_mode::lower;\r
213                                         }\r
214 \r
215                                         draw(std::move(upper), draw_buffer, format_desc);\r
216                                         draw(std::move(lower), draw_buffer, format_desc);\r
217                                 }\r
218                                 else\r
219                                 {\r
220                                         draw(std::move(layers), draw_buffer, format_desc);\r
221                                 }\r
222                         \r
223                                 auto result = ogl_->create_host_buffer(static_cast<int>(format_desc.size), host_buffer::usage::read_only); \r
224                                 draw_buffer->copy_to(*result);                                                  \r
225                                 return result;\r
226                         });\r
227                 \r
228                         // Defer memory mapping.\r
229                         return async(launch_policy::deferred, [=]() mutable -> boost::iterator_range<const uint8_t*>\r
230                         {\r
231                                 const auto& buf = buffer.get();\r
232                                 if(!buf->data())\r
233                                         ogl_->invoke(std::bind(&host_buffer::map, std::ref(buf)), task_priority::high_priority);\r
234 \r
235                                 auto ptr = reinterpret_cast<const uint8_t*>(buf->data()); // .get() and ->data() can block calling thread, ->data() can also block OpenGL thread, defer it as long as possible.\r
236                                 return boost::iterator_range<const uint8_t*>(ptr, ptr + buffer.get()->size());\r
237                         });\r
238                 }\r
239         }\r
240         \r
241         void draw(std::vector<layer>&&                          layers, \r
242                           spl::shared_ptr<device_buffer>&       draw_buffer, \r
243                           const core::video_format_desc&        format_desc)\r
244         {\r
245                 std::shared_ptr<device_buffer> layer_key_buffer;\r
246 \r
247                 BOOST_FOREACH(auto& layer, layers)\r
248                         draw_layer(std::move(layer), draw_buffer, layer_key_buffer, format_desc);\r
249         }\r
250 \r
251         void draw_layer(layer&&                                                         layer, \r
252                                         spl::shared_ptr<device_buffer>&         draw_buffer,\r
253                                         std::shared_ptr<device_buffer>&         layer_key_buffer,\r
254                                         const core::video_format_desc&          format_desc)\r
255         {               \r
256                 // Remove empty items.\r
257                 boost::range::remove_erase_if(layer.items, [&](const item& item)\r
258                 {\r
259                         return item.transform.field_mode == core::field_mode::empty;\r
260                 });\r
261                 \r
262                 // Remove first field stills.\r
263                 boost::range::remove_erase_if(layer.items, [&](const item& item)\r
264                 {\r
265                         return item.transform.is_still && item.transform.field_mode == format_desc.field_mode; // only us last field for stills.\r
266                 });\r
267 \r
268                 if(layer.items.empty())\r
269                         return;\r
270 \r
271                 std::shared_ptr<device_buffer> local_key_buffer;\r
272                 std::shared_ptr<device_buffer> local_mix_buffer;\r
273                                 \r
274                 if(layer.blend_mode != core::blend_mode::normal)\r
275                 {\r
276                         auto layer_draw_buffer = create_mixer_buffer(4, format_desc);\r
277 \r
278                         BOOST_FOREACH(auto& item, layer.items)\r
279                                 draw_item(std::move(item), layer_draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);       \r
280                 \r
281                         draw_mixer_buffer(layer_draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);                                                    \r
282                         draw_mixer_buffer(draw_buffer, std::move(layer_draw_buffer), layer.blend_mode);\r
283                 }\r
284                 else // fast path\r
285                 {\r
286                         BOOST_FOREACH(auto& item, layer.items)          \r
287                                 draw_item(std::move(item), draw_buffer, layer_key_buffer, local_key_buffer, local_mix_buffer, format_desc);             \r
288                                         \r
289                         draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);\r
290                 }                                       \r
291 \r
292                 layer_key_buffer = std::move(local_key_buffer);\r
293         }\r
294 \r
295         void draw_item(item&&                                                   item, \r
296                                    spl::shared_ptr<device_buffer>&      draw_buffer, \r
297                                    std::shared_ptr<device_buffer>&      layer_key_buffer, \r
298                                    std::shared_ptr<device_buffer>&      local_key_buffer, \r
299                                    std::shared_ptr<device_buffer>&      local_mix_buffer,\r
300                                    const core::video_format_desc&       format_desc)\r
301         {                                                                       \r
302                 if(item.pix_desc.planes.at(0).height == 480) // NTSC DV\r
303                 {\r
304                         item.transform.fill_translation[1] += 2.0/static_cast<double>(format_desc.height);\r
305                         item.transform.fill_scale[1] = 1.0 - 6.0*1.0/static_cast<double>(format_desc.height);\r
306                 }\r
307         \r
308                 // Fix field-order if needed\r
309                 if(item.field_mode == core::field_mode::lower && format_desc.field_mode == core::field_mode::upper)\r
310                         item.transform.fill_translation[1] += 1.0/static_cast<double>(format_desc.height);\r
311                 else if(item.field_mode == core::field_mode::upper && format_desc.field_mode == core::field_mode::lower)\r
312                         item.transform.fill_translation[1] -= 1.0/static_cast<double>(format_desc.height);\r
313                 \r
314                 draw_params draw_params;\r
315                 draw_params.pix_desc    = std::move(item.pix_desc);\r
316                 draw_params.transform   = std::move(item.transform);\r
317                 BOOST_FOREACH(auto& future_texture, item.textures)\r
318                         draw_params.textures.push_back(future_texture.get());\r
319 \r
320                 if(item.transform.is_key)\r
321                 {\r
322                         local_key_buffer = local_key_buffer ? local_key_buffer : create_mixer_buffer(1, format_desc);\r
323 \r
324                         draw_params.background                  = local_key_buffer;\r
325                         draw_params.local_key                   = nullptr;\r
326                         draw_params.layer_key                   = nullptr;\r
327 \r
328                         kernel_.draw(std::move(draw_params));\r
329                 }\r
330                 else if(item.transform.is_mix)\r
331                 {\r
332                         local_mix_buffer = local_mix_buffer ? local_mix_buffer : create_mixer_buffer(4, format_desc);\r
333 \r
334                         draw_params.background                  = local_mix_buffer;\r
335                         draw_params.local_key                   = std::move(local_key_buffer);\r
336                         draw_params.layer_key                   = layer_key_buffer;\r
337 \r
338                         draw_params.keyer                               = keyer::additive;\r
339 \r
340                         kernel_.draw(std::move(draw_params));\r
341                 }\r
342                 else\r
343                 {\r
344                         draw_mixer_buffer(draw_buffer, std::move(local_mix_buffer), core::blend_mode::normal);\r
345                         \r
346                         draw_params.background                  = draw_buffer;\r
347                         draw_params.local_key                   = std::move(local_key_buffer);\r
348                         draw_params.layer_key                   = layer_key_buffer;\r
349 \r
350                         kernel_.draw(std::move(draw_params));\r
351                 }       \r
352         }\r
353 \r
354         void draw_mixer_buffer(spl::shared_ptr<device_buffer>&  draw_buffer, \r
355                                                    std::shared_ptr<device_buffer>&& source_buffer, \r
356                                                    core::blend_mode                                     blend_mode = core::blend_mode::normal)\r
357         {\r
358                 if(!source_buffer)\r
359                         return;\r
360 \r
361                 draw_params draw_params;\r
362                 draw_params.pix_desc.format             = core::pixel_format::bgra;\r
363                 draw_params.pix_desc.planes             = list_of(core::pixel_format_desc::plane(source_buffer->width(), source_buffer->height(), 4));\r
364                 draw_params.textures                    = list_of(source_buffer);\r
365                 draw_params.transform                   = core::image_transform();\r
366                 draw_params.blend_mode                  = blend_mode;\r
367                 draw_params.background                  = draw_buffer;\r
368 \r
369                 kernel_.draw(std::move(draw_params));\r
370         }\r
371                         \r
372         spl::shared_ptr<device_buffer> create_mixer_buffer(int stride, const core::video_format_desc& format_desc)\r
373         {\r
374                 auto buffer = ogl_->create_device_buffer(format_desc.width, format_desc.height, stride);\r
375                 buffer->clear();\r
376                 return buffer;\r
377         }\r
378 };\r
379                 \r
380 struct image_mixer::impl : boost::noncopyable\r
381 {       \r
382         spl::shared_ptr<context>                        ogl_;\r
383         image_renderer                                          renderer_;\r
384         std::vector<core::image_transform>      transform_stack_;\r
385         std::vector<layer>                                      layers_; // layer/stream/items\r
386 public:\r
387         impl(const spl::shared_ptr<context>& ogl) \r
388                 : ogl_(ogl)\r
389                 , renderer_(ogl)\r
390                 , transform_stack_(1)   \r
391         {\r
392                 CASPAR_LOG(info) << L"Initialized OpenGL Accelerated GPU Image Mixer";\r
393         }\r
394 \r
395         void begin_layer(core::blend_mode blend_mode)\r
396         {\r
397                 layers_.push_back(layer(std::vector<item>(), blend_mode));\r
398         }\r
399                 \r
400         void push(const core::frame_transform& transform)\r
401         {\r
402                 transform_stack_.push_back(transform_stack_.back()*transform.image_transform);\r
403         }\r
404                 \r
405         void visit(const core::data_frame& frame2)\r
406         {                       \r
407                 auto frame = dynamic_cast<const data_frame*>(&frame2);\r
408                 if(frame == nullptr)\r
409                         return;\r
410 \r
411                 if(frame->pixel_format_desc().format == core::pixel_format::invalid)\r
412                         return;\r
413 \r
414                 if(frame->buffers().empty())\r
415                         return;\r
416 \r
417                 if(transform_stack_.back().field_mode == core::field_mode::empty)\r
418                         return;\r
419 \r
420                 item item;\r
421                 item.pix_desc                   = frame->pixel_format_desc();\r
422                 item.field_mode                 = frame->field_mode();\r
423                 item.buffers                    = frame->buffers();                             \r
424                 item.transform                  = transform_stack_.back();\r
425 \r
426                 layers_.back().items.push_back(item);\r
427         }\r
428 \r
429         void pop()\r
430         {\r
431                 transform_stack_.pop_back();\r
432         }\r
433 \r
434         void end_layer()\r
435         {               \r
436         }\r
437         \r
438         boost::shared_future<boost::iterator_range<const uint8_t*>> render(const core::video_format_desc& format_desc)\r
439         {\r
440                 // Remove empty layers.\r
441                 boost::range::remove_erase_if(layers_, [](const layer& layer)\r
442                 {\r
443                         return layer.items.empty();\r
444                 });\r
445 \r
446                 return renderer_(std::move(layers_), format_desc);\r
447         }\r
448         \r
449         virtual spl::shared_ptr<core::data_frame> create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode)\r
450         {\r
451                 return spl::make_shared<ogl::data_frame>(ogl_, tag, desc, frame_rate, field_mode);\r
452         }\r
453 };\r
454 \r
455 image_mixer::image_mixer(const spl::shared_ptr<context>& ogl) : impl_(new impl(ogl)){}\r
456 void image_mixer::push(const core::frame_transform& transform){impl_->push(transform);}\r
457 void image_mixer::visit(const core::data_frame& frame){impl_->visit(frame);}\r
458 void image_mixer::pop(){impl_->pop();}\r
459 boost::shared_future<boost::iterator_range<const uint8_t*>> image_mixer::operator()(const core::video_format_desc& format_desc){return impl_->render(format_desc);}\r
460 void image_mixer::begin_layer(core::blend_mode blend_mode){impl_->begin_layer(blend_mode);}\r
461 void image_mixer::end_layer(){impl_->end_layer();}\r
462 spl::shared_ptr<core::data_frame> image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, double frame_rate, core::field_mode field_mode) {return impl_->create_frame(tag, desc, frame_rate, field_mode);}\r
463 \r
464 }}}