git.sesse.net Git - casparcg/blob - core/frame/gpu_frame_processor.cpp

   1 #include "../StdAfx.h"\r
   2 \r
   3 #include "gpu_frame_processor.h"\r
   4 \r
   5 #include "gpu_frame.h"\r
   6 #include "composite_gpu_frame.h"\r
   7 #include "frame_format.h"\r
   8 \r
   9 #include "../../common/exception/exceptions.h"\r
  10 #include "../../common/concurrency/executor.h"\r
  11 #include "../../common/utility/memory.h"\r
  12 #include "../../common/gl/utility.h"\r
  13 \r
  14 #include <Glee.h>\r
  15 #include <SFML/Window.hpp>\r
  16 \r
  17 #include <tbb/concurrent_queue.h>\r
  18 #include <tbb/concurrent_unordered_map.h>\r
  19 \r
  20 #include <boost/lexical_cast.hpp>\r
  21 #include <boost/thread/once.hpp>\r
  22 #include <boost/thread.hpp>\r
  23 #include <boost/range.hpp>\r
  24 #include <boost/foreach.hpp>\r
  25 #include <boost/range/algorithm_ext/erase.hpp>\r
  26 #include <boost/range/algorithm.hpp>\r
  27 \r
  28 #include <functional>\r
  29 #include <unordered_map>\r
  30 #include <numeric>\r
  31 \r
  32 namespace caspar {\r
  33         \r
  34 class frame_buffer\r
  35 {\r
  36 public:\r
  37         frame_buffer(size_t width, size_t height)\r
  38         {\r
  39                 CASPAR_GL_CHECK(glGenTextures(1, &texture_));   \r
  40 \r
  41                 CASPAR_GL_CHECK(glBindTexture(GL_TEXTURE_2D, texture_));\r
  42 \r
  43                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR));\r
  44                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR));\r
  45                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));\r
  46                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));\r
  47 \r
  48                 CASPAR_GL_CHECK(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL));\r
  49 \r
  50                 glGenFramebuffersEXT(1, &fbo_);\r
  51                 \r
  52                 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo_);\r
  53                 glBindTexture(GL_TEXTURE_2D, texture_);\r
  54                 glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, texture_, 0);\r
  55         }\r
  56 \r
  57         ~frame_buffer()\r
  58         {\r
  59                 glDeleteFramebuffersEXT(1, &fbo_);\r
  60         }\r
  61                 \r
  62         GLuint handle() { return fbo_; }\r
  63         GLenum attachement() { return GL_COLOR_ATTACHMENT0_EXT; }\r
  64         \r
  65 private:\r
  66         GLuint texture_;\r
  67         GLuint fbo_;\r
  68 };\r
  69 typedef std::shared_ptr<frame_buffer> frame_buffer_ptr;\r
  70 \r
  71 struct gpu_frame_processor::implementation\r
  72 {       \r
  73         implementation(const frame_format_desc& format_desc) : format_desc_(format_desc)\r
  74         {               \r
  75                 executor_.start();\r
  76                 executor_.begin_invoke([=]\r
  77                 {\r
  78                         context_.reset(new sf::Context());\r
  79                         context_->SetActive(true);\r
  80                         glEnable(GL_POLYGON_STIPPLE);\r
  81                         glEnable(GL_TEXTURE_2D);\r
  82                         glEnable(GL_BLEND);\r
  83                         glDisable(GL_DEPTH_TEST);\r
  84                         glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);                      \r
  85                         glClearColor(0.0, 0.0, 0.0, 0.0);\r
  86                         glViewport(0, 0, format_desc_.width, format_desc_.height);\r
  87                         glLoadIdentity();\r
  88 \r
  89                         input_.resize(2, std::make_shared<composite_gpu_frame>(format_desc_.width, format_desc_.height));\r
  90                         writing_.resize(2, std::make_shared<composite_gpu_frame>(format_desc_.width, format_desc_.height));\r
  91                         fbo_ = std::make_shared<frame_buffer>(format_desc_.width, format_desc_.height);\r
  92                         output_frame_ = std::make_shared<gpu_frame>(format_desc_.width, format_desc_.height);\r
  93                         index_ = 0;\r
  94                 });\r
  95 \r
  96                 empty_frame_ = create_frame(format_desc.width, format_desc.height);\r
  97                 common::clear(empty_frame_->data(), empty_frame_->size());\r
  98                 for(int n = 0; n < 3; ++n)\r
  99                         finished_frames_.push(empty_frame_);\r
 100         }\r
 101 \r
 102         ~implementation()\r
 103         {\r
 104                 finished_frames_.push(nullptr);\r
 105                 executor_.stop();\r
 106         }\r
 107                 \r
 108         void pop(gpu_frame_ptr& frame)\r
 109         {\r
 110                 finished_frames_.pop(frame);\r
 111         }       \r
 112         \r
 113         void composite(std::vector<gpu_frame_ptr> frames)\r
 114         {\r
 115                 boost::range::remove_erase(frames, nullptr);\r
 116                 boost::range::remove_erase(frames, gpu_frame::null());\r
 117                 auto composite_frame = std::make_shared<composite_gpu_frame>(format_desc_.width, format_desc_.height);\r
 118                 boost::range::for_each(frames, std::bind(&composite_gpu_frame::add, composite_frame, std::placeholders::_1));\r
 119 \r
 120                 executor_.begin_invoke([=]\r
 121                 {\r
 122                         try\r
 123                         {\r
 124                                 index_ = (index_ + 1) % 2;\r
 125                                 int next_index = (index_ + 1) % 2;\r
 126 \r
 127                                 // 2. Start asynchronous DMA transfer to video memory\r
 128                                 // Lock frames and give pointer ownership to OpenGL                     \r
 129                                 input_[index_]->write_lock();\r
 130                                 writing_[index_] = std::move(input_[index_]);   \r
 131                                 \r
 132                                 // 1. Copy to page-locked memory\r
 133                                 input_[next_index] = std::move(composite_frame);\r
 134                                                                 \r
 135                                 // 4. Output to external buffer\r
 136                                 if(output_frame_->read_unlock())\r
 137                                         finished_frames_.push(output_frame_);\r
 138                 \r
 139                                 // 3. Draw to framebuffer and start asynchronous DMA transfer to page-locked memory                             \r
 140                                 // Clear framebuffer\r
 141                                 glClear(GL_COLOR_BUFFER_BIT);   \r
 142                                 writing_[next_index]->draw();\r
 143                                 \r
 144                                 // Create an output frame\r
 145                                 output_frame_ = create_output_frame();\r
 146                         \r
 147                                 // Read from framebuffer into page-locked memory\r
 148                                 output_frame_->read_lock(GL_COLOR_ATTACHMENT0_EXT);\r
 149                                 output_frame_->audio_data() = std::move(writing_[next_index]->audio_data());\r
 150 \r
 151                                 // Unlock frames and give back pointer ownership\r
 152                                 writing_[next_index]->write_unlock();\r
 153                                 \r
 154                                 // Return frames to pool\r
 155                                 writing_[next_index].reset();\r
 156                         }\r
 157                         catch(...)\r
 158                         {\r
 159                                 CASPAR_LOG_CURRENT_EXCEPTION();\r
 160                         }\r
 161                 });     \r
 162         }\r
 163 \r
 164         gpu_frame_ptr create_output_frame()\r
 165         {       \r
 166                 gpu_frame_ptr frame;\r
 167                 if(!out_frame_pool_.try_pop(frame))                             \r
 168                         frame = std::make_shared<gpu_frame>(format_desc_.width, format_desc_.height);\r
 169 \r
 170                 return gpu_frame_ptr(frame.get(), [=](gpu_frame*)\r
 171                 {\r
 172                         frame->reset();\r
 173                         out_frame_pool_.push(frame);\r
 174                 });\r
 175         }\r
 176                 \r
 177         gpu_frame_ptr create_frame(size_t width, size_t height)\r
 178         {\r
 179                 size_t key = width | (height << 16);\r
 180                 auto& pool = input_frame_pools_[key];\r
 181                 \r
 182                 gpu_frame_ptr frame;\r
 183                 if(!pool.try_pop(frame))\r
 184                 {\r
 185                         frame = executor_.invoke([=]() -> gpu_frame_ptr\r
 186                         {\r
 187                                 auto frame = std::make_shared<gpu_frame>(width, height);\r
 188                                 frame->write_unlock();\r
 189                                 return frame;\r
 190                         });\r
 191                 }\r
 192                 \r
 193                 auto destructor = [=]\r
 194                 {\r
 195                         frame->reset();\r
 196                         input_frame_pools_[key].push(frame);\r
 197                 };\r
 198 \r
 199                 return gpu_frame_ptr(frame.get(), [=](gpu_frame*)\r
 200                 {\r
 201                         executor_.begin_invoke(destructor);\r
 202                 });\r
 203         }\r
 204                         \r
 205         tbb::concurrent_unordered_map<size_t, tbb::concurrent_bounded_queue<gpu_frame_ptr>> input_frame_pools_;\r
 206 \r
 207         tbb::concurrent_bounded_queue<gpu_frame_ptr> out_frame_pool_;\r
 208                 \r
 209         frame_buffer_ptr fbo_;\r
 210 \r
 211         int index_;\r
 212         std::vector<composite_gpu_frame_ptr>                    input_;\r
 213         std::vector<composite_gpu_frame_ptr>                    writing_;\r
 214 \r
 215         gpu_frame_ptr                                                                   output_frame_;\r
 216         tbb::concurrent_bounded_queue<gpu_frame_ptr>    finished_frames_;\r
 217                         \r
 218         frame_format_desc format_desc_;\r
 219         \r
 220         std::unique_ptr<sf::Context> context_;\r
 221         common::executor executor_;\r
 222 \r
 223         gpu_frame_ptr empty_frame_;\r
 224 };\r
 225         \r
 226 gpu_frame_processor::gpu_frame_processor(const frame_format_desc& format_desc) : impl_(new implementation(format_desc)){}\r
 227 void gpu_frame_processor::push(const std::vector<gpu_frame_ptr>& frames){ impl_->composite(frames);}\r
 228 void gpu_frame_processor::pop(gpu_frame_ptr& frame){ impl_->pop(frame);}\r
 229 gpu_frame_ptr gpu_frame_processor::create_frame(size_t width, size_t height){return impl_->create_frame(width, height);}\r
 230 \r
 231 }