]> git.sesse.net Git - casparcg/blob - core/frame/gpu_frame_processor.cpp
2.0.0.2:
[casparcg] / core / frame / gpu_frame_processor.cpp
1 #include "../StdAfx.h"\r
2 \r
3 #include "gpu_frame_processor.h"\r
4 \r
5 #include "gpu_frame.h"\r
6 #include "composite_gpu_frame.h"\r
7 #include "frame_format.h"\r
8 \r
9 #include "../../common/exception/exceptions.h"\r
10 #include "../../common/concurrency/executor.h"\r
11 #include "../../common/utility/memory.h"\r
12 #include "../../common/gl/utility.h"\r
13 \r
14 #include <Glee.h>\r
15 #include <SFML/Window.hpp>\r
16 \r
17 #include <tbb/concurrent_queue.h>\r
18 #include <tbb/concurrent_unordered_map.h>\r
19 \r
20 #include <boost/lexical_cast.hpp>\r
21 #include <boost/thread/once.hpp>\r
22 #include <boost/thread.hpp>\r
23 #include <boost/range.hpp>\r
24 #include <boost/foreach.hpp>\r
25 #include <boost/range/algorithm_ext/erase.hpp>\r
26 #include <boost/range/algorithm.hpp>\r
27 \r
28 #include <functional>\r
29 #include <unordered_map>\r
30 #include <numeric>\r
31 \r
32 namespace caspar {\r
33         \r
34 class frame_buffer\r
35 {\r
36 public:\r
37         frame_buffer(size_t width, size_t height)\r
38         {\r
39                 CASPAR_GL_CHECK(glGenTextures(1, &texture_));   \r
40 \r
41                 CASPAR_GL_CHECK(glBindTexture(GL_TEXTURE_2D, texture_));\r
42 \r
43                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR));\r
44                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR));\r
45                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));\r
46                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));\r
47 \r
48                 CASPAR_GL_CHECK(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL));\r
49 \r
50                 glGenFramebuffersEXT(1, &fbo_);\r
51                 \r
52                 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo_);\r
53                 glBindTexture(GL_TEXTURE_2D, texture_);\r
54                 glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, texture_, 0);\r
55         }\r
56 \r
57         ~frame_buffer()\r
58         {\r
59                 glDeleteFramebuffersEXT(1, &fbo_);\r
60         }\r
61                 \r
62         GLuint handle() { return fbo_; }\r
63         GLenum attachement() { return GL_COLOR_ATTACHMENT0_EXT; }\r
64         \r
65 private:\r
66         GLuint texture_;\r
67         GLuint fbo_;\r
68 };\r
69 typedef std::shared_ptr<frame_buffer> frame_buffer_ptr;\r
70 \r
71 struct gpu_frame_processor::implementation\r
72 {       \r
73         implementation(const frame_format_desc& format_desc) : format_desc_(format_desc)\r
74         {               \r
75                 executor_.start();\r
76                 executor_.begin_invoke([=]\r
77                 {\r
78                         context_.reset(new sf::Context());\r
79                         context_->SetActive(true);\r
80                         glEnable(GL_POLYGON_STIPPLE);\r
81                         glEnable(GL_TEXTURE_2D);\r
82                         glEnable(GL_BLEND);\r
83                         glDisable(GL_DEPTH_TEST);\r
84                         glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);                      \r
85                         glClearColor(0.0, 0.0, 0.0, 0.0);\r
86                         glViewport(0, 0, format_desc_.width, format_desc_.height);\r
87                         glLoadIdentity();\r
88 \r
89                         input_.resize(2, std::make_shared<composite_gpu_frame>(format_desc_.width, format_desc_.height));\r
90                         writing_.resize(2, std::make_shared<composite_gpu_frame>(format_desc_.width, format_desc_.height));\r
91                         fbo_ = std::make_shared<frame_buffer>(format_desc_.width, format_desc_.height);\r
92                         output_frame_ = std::make_shared<gpu_frame>(format_desc_.width, format_desc_.height);\r
93                         index_ = 0;\r
94                 });\r
95 \r
96                 empty_frame_ = create_frame(format_desc.width, format_desc.height);\r
97                 common::clear(empty_frame_->data(), empty_frame_->size());\r
98                 for(int n = 0; n < 3; ++n)\r
99                         finished_frames_.push(empty_frame_);\r
100         }\r
101 \r
102         ~implementation()\r
103         {\r
104                 finished_frames_.push(nullptr);\r
105                 executor_.stop();\r
106         }\r
107                 \r
108         void pop(gpu_frame_ptr& frame)\r
109         {\r
110                 finished_frames_.pop(frame);\r
111         }       \r
112         \r
113         void composite(std::vector<gpu_frame_ptr> frames)\r
114         {\r
115                 boost::range::remove_erase(frames, nullptr);\r
116                 boost::range::remove_erase(frames, gpu_frame::null());\r
117                 auto composite_frame = std::make_shared<composite_gpu_frame>(format_desc_.width, format_desc_.height);\r
118                 boost::range::for_each(frames, std::bind(&composite_gpu_frame::add, composite_frame, std::placeholders::_1));\r
119 \r
120                 executor_.begin_invoke([=]\r
121                 {\r
122                         try\r
123                         {\r
124                                 index_ = (index_ + 1) % 2;\r
125                                 int next_index = (index_ + 1) % 2;\r
126 \r
127                                 // 2. Start asynchronous DMA transfer to video memory\r
128                                 // Lock frames and give pointer ownership to OpenGL                     \r
129                                 input_[index_]->write_lock();\r
130                                 writing_[index_] = std::move(input_[index_]);   \r
131                                 \r
132                                 // 1. Copy to page-locked memory\r
133                                 input_[next_index] = std::move(composite_frame);\r
134                                                                 \r
135                                 // 4. Output to external buffer\r
136                                 if(output_frame_->read_unlock())\r
137                                         finished_frames_.push(output_frame_);\r
138                 \r
139                                 // 3. Draw to framebuffer and start asynchronous DMA transfer to page-locked memory                             \r
140                                 // Clear framebuffer\r
141                                 glClear(GL_COLOR_BUFFER_BIT);   \r
142                                 writing_[next_index]->draw();\r
143                                 \r
144                                 // Create an output frame\r
145                                 output_frame_ = create_output_frame();\r
146                         \r
147                                 // Read from framebuffer into page-locked memory\r
148                                 output_frame_->read_lock(GL_COLOR_ATTACHMENT0_EXT);\r
149                                 output_frame_->audio_data() = std::move(writing_[next_index]->audio_data());\r
150 \r
151                                 // Unlock frames and give back pointer ownership\r
152                                 writing_[next_index]->write_unlock();\r
153                                 \r
154                                 // Return frames to pool\r
155                                 writing_[next_index].reset();\r
156                         }\r
157                         catch(...)\r
158                         {\r
159                                 CASPAR_LOG_CURRENT_EXCEPTION();\r
160                         }\r
161                 });     \r
162         }\r
163 \r
164         gpu_frame_ptr create_output_frame()\r
165         {       \r
166                 gpu_frame_ptr frame;\r
167                 if(!out_frame_pool_.try_pop(frame))                             \r
168                         frame = std::make_shared<gpu_frame>(format_desc_.width, format_desc_.height);\r
169 \r
170                 return gpu_frame_ptr(frame.get(), [=](gpu_frame*)\r
171                 {\r
172                         frame->reset();\r
173                         out_frame_pool_.push(frame);\r
174                 });\r
175         }\r
176                 \r
177         gpu_frame_ptr create_frame(size_t width, size_t height)\r
178         {\r
179                 size_t key = width | (height << 16);\r
180                 auto& pool = input_frame_pools_[key];\r
181                 \r
182                 gpu_frame_ptr frame;\r
183                 if(!pool.try_pop(frame))\r
184                 {\r
185                         frame = executor_.invoke([=]() -> gpu_frame_ptr\r
186                         {\r
187                                 auto frame = std::make_shared<gpu_frame>(width, height);\r
188                                 frame->write_unlock();\r
189                                 return frame;\r
190                         });\r
191                 }\r
192                 \r
193                 auto destructor = [=]\r
194                 {\r
195                         frame->reset();\r
196                         input_frame_pools_[key].push(frame);\r
197                 };\r
198 \r
199                 return gpu_frame_ptr(frame.get(), [=](gpu_frame*)\r
200                 {\r
201                         executor_.begin_invoke(destructor);\r
202                 });\r
203         }\r
204                         \r
205         tbb::concurrent_unordered_map<size_t, tbb::concurrent_bounded_queue<gpu_frame_ptr>> input_frame_pools_;\r
206 \r
207         tbb::concurrent_bounded_queue<gpu_frame_ptr> out_frame_pool_;\r
208                 \r
209         frame_buffer_ptr fbo_;\r
210 \r
211         int index_;\r
212         std::vector<composite_gpu_frame_ptr>                    input_;\r
213         std::vector<composite_gpu_frame_ptr>                    writing_;\r
214 \r
215         gpu_frame_ptr                                                                   output_frame_;\r
216         tbb::concurrent_bounded_queue<gpu_frame_ptr>    finished_frames_;\r
217                         \r
218         frame_format_desc format_desc_;\r
219         \r
220         std::unique_ptr<sf::Context> context_;\r
221         common::executor executor_;\r
222 \r
223         gpu_frame_ptr empty_frame_;\r
224 };\r
225         \r
226 gpu_frame_processor::gpu_frame_processor(const frame_format_desc& format_desc) : impl_(new implementation(format_desc)){}\r
227 void gpu_frame_processor::push(const std::vector<gpu_frame_ptr>& frames){ impl_->composite(frames);}\r
228 void gpu_frame_processor::pop(gpu_frame_ptr& frame){ impl_->pop(frame);}\r
229 gpu_frame_ptr gpu_frame_processor::create_frame(size_t width, size_t height){return impl_->create_frame(width, height);}\r
230 \r
231 }