]> git.sesse.net Git - casparcg/blob - core/frame/gpu_frame_processor.cpp
9162d4f919e3511cf9d2bdf2e4534b54fa08440a
[casparcg] / core / frame / gpu_frame_processor.cpp
1 #include "../StdAfx.h"\r
2 \r
3 #include "gpu_frame_processor.h"\r
4 \r
5 #include "gpu_frame.h"\r
6 #include "frame_format.h"\r
7 \r
8 #include "../../common/exception/exceptions.h"\r
9 #include "../../common/concurrency/executor.h"\r
10 #include "../../common/image/image.h"\r
11 #include "../../common/gl/utility.h"\r
12 \r
13 #include <Glee.h>\r
14 #include <SFML/Window.hpp>\r
15 \r
16 #include <tbb/concurrent_queue.h>\r
17 #include <tbb/concurrent_unordered_map.h>\r
18 \r
19 #include <boost/lexical_cast.hpp>\r
20 #include <boost/thread/once.hpp>\r
21 #include <boost/thread.hpp>\r
22 #include <boost/range.hpp>\r
23 #include <boost/foreach.hpp>\r
24 #include <boost/range/algorithm_ext/erase.hpp>\r
25 #include <boost/range/algorithm.hpp>\r
26 \r
27 #include <functional>\r
28 #include <unordered_map>\r
29 #include <numeric>\r
30 \r
31 namespace caspar {\r
32         \r
33 class frame_buffer\r
34 {\r
35 public:\r
36         frame_buffer(size_t width, size_t height)\r
37         {\r
38                 CASPAR_GL_CHECK(glGenTextures(1, &texture_));   \r
39 \r
40                 CASPAR_GL_CHECK(glBindTexture(GL_TEXTURE_2D, texture_));\r
41 \r
42                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR));\r
43                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR));\r
44                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));\r
45                 CASPAR_GL_CHECK(glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));\r
46 \r
47                 CASPAR_GL_CHECK(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL));\r
48 \r
49                 glGenFramebuffersEXT(1, &fbo_);\r
50                 \r
51                 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo_);\r
52                 glBindTexture(GL_TEXTURE_2D, texture_);\r
53                 glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, texture_, 0);\r
54         }\r
55 \r
56         ~frame_buffer()\r
57         {\r
58                 glDeleteFramebuffersEXT(1, &fbo_);\r
59         }\r
60                 \r
61         GLuint handle() { return fbo_; }\r
62         GLenum attachement() { return GL_COLOR_ATTACHMENT0_EXT; }\r
63         \r
64 private:\r
65         GLuint texture_;\r
66         GLuint fbo_;\r
67 };\r
68 typedef std::shared_ptr<frame_buffer> frame_buffer_ptr;\r
69 \r
70 struct gpu_frame_processor::implementation\r
71 {       \r
72         implementation(const frame_format_desc& format_desc) : format_desc_(format_desc)\r
73         {               \r
74                 executor_.start();\r
75                 executor_.begin_invoke([=]\r
76                 {\r
77                         context_.reset(new sf::Context());\r
78                         context_->SetActive(true);\r
79                         glEnable(GL_TEXTURE_2D);\r
80                         glEnable(GL_BLEND);\r
81                         glDisable(GL_DEPTH_TEST);\r
82                         glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);                      \r
83                         glClearColor(0.0, 0.0, 0.0, 0.0);\r
84                         glViewport(0, 0, format_desc_.width, format_desc_.height);\r
85                         glLoadIdentity();\r
86 \r
87                         input_.resize(2);\r
88                         writing_.resize(2);\r
89                         fbo_ = std::make_shared<frame_buffer>(format_desc_.width, format_desc_.height);\r
90                         output_frame_ = std::make_shared<gpu_frame>(format_desc_.width, format_desc_.height);\r
91                         index_ = 0;\r
92                 });\r
93 \r
94                 empty_frame_ = create_frame(format_desc.width, format_desc.height);\r
95                 common::image::clear(empty_frame_->data(), empty_frame_->size());\r
96                 // Fill pipeline length\r
97                 for(int n = 0; n < 3; ++n)\r
98                         finished_frames_.push(empty_frame_);\r
99         }\r
100 \r
101         ~implementation()\r
102         {\r
103                 finished_frames_.push(nullptr);\r
104                 executor_.stop();\r
105         }\r
106                 \r
107         void pop(gpu_frame_ptr& frame)\r
108         {\r
109                 finished_frames_.pop(frame);\r
110         }       \r
111         \r
112         void composite(std::vector<gpu_frame_ptr> frames)\r
113         {\r
114                 boost::range::remove_erase(frames, nullptr);\r
115                 boost::range::remove_erase(frames, gpu_frame::null());\r
116 \r
117                 executor_.begin_invoke([=]\r
118                 {\r
119                         try\r
120                         {\r
121                                 index_ = (index_ + 1) % 2;\r
122                                 int next_index = (index_ + 1) % 2;\r
123 \r
124                                 // 2. Start asynchronous DMA transfer to video memory\r
125                                 // Lock frames and give pointer ownership to OpenGL                     \r
126                                 boost::range::for_each(input_[index_], std::mem_fn(&gpu_frame::write_lock));\r
127                                 writing_[index_] = input_[index_];      \r
128                                 input_[index_].clear();\r
129                                 \r
130                                 // 1. Copy to page-locked memory\r
131                                 input_[next_index] = frames;\r
132                                                                 \r
133                                 // 4. Output to external buffer\r
134                                 if(output_frame_->read_unlock())\r
135                                         finished_frames_.push(output_frame_);\r
136                                 output_frame_ = nullptr;\r
137                 \r
138                                 // 3. Draw to framebuffer and start asynchronous DMA transfer to page-locked memory                             \r
139                                 // Clear framebuffer\r
140                                 glClear(GL_COLOR_BUFFER_BIT);   \r
141 \r
142                                 // Draw all frames to framebuffer\r
143                                 glLoadIdentity();\r
144                                 boost::range::for_each(writing_[next_index], std::mem_fn(&gpu_frame::draw));\r
145                                 \r
146                                 // Create an output frame\r
147                                 output_frame_ = create_output_frame();\r
148                         \r
149                                 // Read from framebuffer into page-locked memory\r
150                                 output_frame_->read_lock(GL_COLOR_ATTACHMENT0_EXT);\r
151 \r
152                                 // Unlock frames and give back pointer ownership\r
153                                 boost::range::for_each(writing_[next_index], std::mem_fn(&gpu_frame::write_unlock));\r
154                                 \r
155                                 // Mix audio from composite frames into output frame\r
156                                 std::accumulate(writing_[next_index].begin(), writing_[next_index].end(), output_frame_, mix_audio_safe<gpu_frame_ptr>);        \r
157 \r
158                                 // Return frames to pool\r
159                                 writing_[next_index].clear();\r
160                         }\r
161                         catch(...)\r
162                         {\r
163                                 CASPAR_LOG_CURRENT_EXCEPTION();\r
164                         }\r
165                 });     \r
166         }\r
167 \r
168         gpu_frame_ptr create_output_frame()\r
169         {       \r
170                 gpu_frame_ptr frame;\r
171                 if(!out_frame_pool_.try_pop(frame))                             \r
172                         frame = std::make_shared<gpu_frame>(format_desc_.width, format_desc_.height);\r
173 \r
174                 return gpu_frame_ptr(frame.get(), [=](gpu_frame*)\r
175                 {\r
176                         frame->reset();\r
177                         out_frame_pool_.push(frame);\r
178                 });\r
179         }\r
180                 \r
181         gpu_frame_ptr create_frame(size_t width, size_t height)\r
182         {\r
183                 size_t key = width | (height << 16);\r
184                 auto& pool = input_frame_pools_[key];\r
185                 \r
186                 gpu_frame_ptr frame;\r
187                 if(!pool.try_pop(frame))\r
188                 {\r
189                         frame = executor_.invoke([=]() -> gpu_frame_ptr\r
190                         {\r
191                                 auto frame = std::make_shared<gpu_frame>(width, height);\r
192                                 frame->write_unlock();\r
193                                 return frame;\r
194                         });\r
195                 }\r
196                 \r
197                 auto destructor = [=]\r
198                 {\r
199                         frame->reset();\r
200                         input_frame_pools_[key].push(frame);\r
201                 };\r
202 \r
203                 return gpu_frame_ptr(frame.get(), [=](gpu_frame*)\r
204                 {\r
205                         executor_.begin_invoke(destructor);\r
206                 });\r
207         }\r
208                         \r
209         tbb::concurrent_unordered_map<size_t, tbb::concurrent_bounded_queue<gpu_frame_ptr>> input_frame_pools_;\r
210 \r
211         tbb::concurrent_bounded_queue<gpu_frame_ptr> out_frame_pool_;\r
212                 \r
213         frame_buffer_ptr fbo_;\r
214 \r
215         int index_;\r
216         std::vector<std::vector<gpu_frame_ptr>>                 input_;\r
217         std::vector<std::vector<gpu_frame_ptr>>                 writing_;\r
218 \r
219         gpu_frame_ptr                                                                   output_frame_;\r
220         tbb::concurrent_bounded_queue<gpu_frame_ptr>    finished_frames_;\r
221                         \r
222         frame_format_desc format_desc_;\r
223         \r
224         std::unique_ptr<sf::Context> context_;\r
225         common::executor executor_;\r
226 \r
227         gpu_frame_ptr empty_frame_;\r
228 };\r
229         \r
230 gpu_frame_processor::gpu_frame_processor(const frame_format_desc& format_desc) : impl_(new implementation(format_desc)){}\r
231 void gpu_frame_processor::push(const std::vector<gpu_frame_ptr>& frames){ impl_->composite(frames);}\r
232 void gpu_frame_processor::pop(gpu_frame_ptr& frame){ impl_->pop(frame);}\r
233 gpu_frame_ptr gpu_frame_processor::create_frame(size_t width, size_t height){return impl_->create_frame(width, height);}\r
234 \r
235 }