]> git.sesse.net Git - nageru/blob - nageru/pbo_frame_allocator.cpp
1000a727326604c2d30a2f7567b296f0af096833
[nageru] / nageru / pbo_frame_allocator.cpp
1 #include "pbo_frame_allocator.h"
2
3 #include <bmusb/bmusb.h>
4 #include <movit/util.h>
5 #include <stdbool.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <cstddef>
9
10 #include "flags.h"
11 #include "mjpeg_encoder.h"
12 #include "v210_converter.h"
13 #include "va_display_with_cleanup.h"
14
15 using namespace std;
16
17 namespace {
18
19 void set_clamp_to_edge()
20 {
21         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
22         check_error();
23         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
24         check_error();
25         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
26         check_error();
27 }
28
29 }  // namespace
30
31 PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, unsigned card_index, MJPEGEncoder *mjpeg_encoder, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
32         : card_index(card_index),
33           mjpeg_encoder(mjpeg_encoder),
34           pixel_format(pixel_format),
35           buffer(buffer),
36           frame_size(frame_size),
37           num_queued_frames(num_queued_frames),
38           width(width),
39           height(height),
40           permissions(permissions),
41           map_bits(map_bits)
42 {
43         userdata.reset(new Userdata[num_queued_frames]);
44         for (size_t i = 0; i < num_queued_frames; ++i) {
45                 init_frame(i, frame_size, width, height, permissions, map_bits, generation);
46         }
47         glBindBuffer(buffer, 0);
48         check_error();
49         glBindTexture(GL_TEXTURE_2D, 0);
50         check_error();
51 }
52
53 void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits, int generation)
54 {
55         GLuint pbo;
56         glGenBuffers(1, &pbo);
57         check_error();
58         glBindBuffer(buffer, pbo);
59         check_error();
60         glBufferStorage(buffer, frame_size, nullptr, permissions | GL_MAP_PERSISTENT_BIT);
61         check_error();
62
63         Frame frame;
64         frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
65         frame.data2 = frame.data + frame_size / 2;
66         check_error();
67         frame.size = frame_size;
68         Userdata *ud = &userdata[frame_idx];
69         frame.userdata = ud;
70         ud->generation = generation;
71         ud->pbo = pbo;
72         ud->pixel_format = pixel_format;
73         ud->data_copy_malloc = new uint8_t[frame_size];
74         frame.owner = this;
75
76         // For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr
77         // into separate textures. For 10-bit, the input format (v210)
78         // is complicated enough that we need to interpolate up to 4:4:4,
79         // which we do in a compute shader ourselves. For BGRA, the data
80         // is already 4:4:4:4.
81         frame.interleaved = (pixel_format == bmusb::PixelFormat_8BitYCbCr);
82
83         // Create textures. We don't allocate any data for the second field at this point
84         // (just create the texture state with the samplers), since our default assumed
85         // resolution is progressive.
86         switch (pixel_format) {
87         case bmusb::PixelFormat_8BitYCbCr:
88                 glGenTextures(2, ud->tex_y);
89                 check_error();
90                 glGenTextures(2, ud->tex_cbcr);
91                 check_error();
92                 break;
93         case bmusb::PixelFormat_10BitYCbCr:
94                 glGenTextures(2, ud->tex_v210);
95                 check_error();
96                 glGenTextures(2, ud->tex_444);
97                 check_error();
98                 break;
99         case bmusb::PixelFormat_8BitBGRA:
100                 glGenTextures(2, ud->tex_rgba);
101                 check_error();
102                 break;
103         case bmusb::PixelFormat_8BitYCbCrPlanar:
104                 glGenTextures(2, ud->tex_y);
105                 check_error();
106                 glGenTextures(2, ud->tex_cb);
107                 check_error();
108                 glGenTextures(2, ud->tex_cr);
109                 check_error();
110                 break;
111         default:
112                 assert(false);
113         }
114
115         ud->last_width[0] = width;
116         ud->last_height[0] = height;
117         ud->last_cbcr_width[0] = width / 2;
118         ud->last_cbcr_height[0] = height;
119         ud->last_v210_width[0] = 0;
120
121         ud->last_width[1] = 0;
122         ud->last_height[1] = 0;
123         ud->last_cbcr_width[1] = 0;
124         ud->last_cbcr_height[1] = 0;
125         ud->last_v210_width[1] = 0;
126
127         ud->last_interlaced = false;
128         ud->last_has_signal = false;
129         ud->last_is_connected = false;
130         for (unsigned field = 0; field < 2; ++field) {
131                 switch (pixel_format) {
132                 case bmusb::PixelFormat_10BitYCbCr: {
133                         const size_t v210_width = v210Converter::get_minimum_v210_texture_width(width);
134
135                         // Seemingly we need to set the minification filter even though
136                         // shader image loads don't use them, or NVIDIA will just give us
137                         // zero back.
138                         glBindTexture(GL_TEXTURE_2D, ud->tex_v210[field]);
139                         check_error();
140                         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
141                         check_error();
142                         if (field == 0) {
143                                 ud->last_v210_width[0] = v210_width;
144                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, v210_width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
145                                 check_error();
146                         }
147
148                         glBindTexture(GL_TEXTURE_2D, ud->tex_444[field]);
149                         check_error();
150                         set_clamp_to_edge();
151                         if (field == 0) {
152                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
153                                 check_error();
154                         }
155                         break;
156                 }
157                 case bmusb::PixelFormat_8BitYCbCr:
158                         glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
159                         check_error();
160                         set_clamp_to_edge();
161                         if (field == 0) {
162                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
163                                 check_error();
164                         }
165
166                         glBindTexture(GL_TEXTURE_2D, ud->tex_cbcr[field]);
167                         check_error();
168                         set_clamp_to_edge();
169                         if (field == 0) {
170                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, width / 2, height, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
171                                 check_error();
172                         }
173                         break;
174                 case bmusb::PixelFormat_8BitBGRA:
175                         glBindTexture(GL_TEXTURE_2D, ud->tex_rgba[field]);
176                         check_error();
177                         set_clamp_to_edge();
178                         if (field == 0) {
179                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
180                                 check_error();
181                         }
182                         break;
183                 case bmusb::PixelFormat_8BitYCbCrPlanar:
184                         glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
185                         check_error();
186                         set_clamp_to_edge();
187                         if (field == 0) {
188                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
189                                 check_error();
190                         }
191
192                         glBindTexture(GL_TEXTURE_2D, ud->tex_cb[field]);
193                         check_error();
194                         set_clamp_to_edge();
195                         if (field == 0) {
196                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
197                                 check_error();
198                         }
199
200                         glBindTexture(GL_TEXTURE_2D, ud->tex_cr[field]);
201                         check_error();
202                         set_clamp_to_edge();
203                         if (field == 0) {
204                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
205                                 check_error();
206                         }
207                         break;
208                 default:
209                         assert(false);
210                 }
211         }
212
213         freelist.push(frame);
214 }
215
216 PBOFrameAllocator::~PBOFrameAllocator()
217 {
218         while (!freelist.empty()) {
219                 Frame frame = freelist.front();
220                 freelist.pop();
221                 destroy_frame(&frame);
222         }
223 }
224
225 void PBOFrameAllocator::destroy_frame(Frame *frame)
226 {
227         Userdata *ud = (Userdata *)frame->userdata;
228         delete[] ud->data_copy_malloc;
229
230         GLuint pbo = ud->pbo;
231         glBindBuffer(buffer, pbo);
232         check_error();
233         glUnmapBuffer(buffer);
234         check_error();
235         glBindBuffer(buffer, 0);
236         check_error();
237         glDeleteBuffers(1, &pbo);
238         check_error();
239         switch (pixel_format) {
240         case bmusb::PixelFormat_10BitYCbCr:
241                 glDeleteTextures(2, ud->tex_v210);
242                 check_error();
243                 glDeleteTextures(2, ud->tex_444);
244                 check_error();
245                 break;
246         case bmusb::PixelFormat_8BitYCbCr:
247                 glDeleteTextures(2, ud->tex_y);
248                 check_error();
249                 glDeleteTextures(2, ud->tex_cbcr);
250                 check_error();
251                 break;
252         case bmusb::PixelFormat_8BitBGRA:
253                 glDeleteTextures(2, ud->tex_rgba);
254                 check_error();
255                 break;
256         case bmusb::PixelFormat_8BitYCbCrPlanar:
257                 glDeleteTextures(2, ud->tex_y);
258                 check_error();
259                 glDeleteTextures(2, ud->tex_cb);
260                 check_error();
261                 glDeleteTextures(2, ud->tex_cr);
262                 check_error();
263                 break;
264         default:
265                 assert(false);
266         }
267
268         if (ud->generation != generation) {
269                 auto it = lingering_generations.find(ud->generation);
270                 assert(it != lingering_generations.end());
271                 if (--it->second.num_frames_left == 0) {
272                         lingering_generations.erase(it);  // Deallocates the userdata block.
273                 }
274         }
275 }
276 //static int sumsum = 0;
277
278 bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame()
279 {
280         Frame vf;
281
282         lock_guard<mutex> lock(freelist_mutex);  // Meh.
283         if (freelist.empty()) {
284                 printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
285         } else {
286                 //fprintf(stderr, "freelist has %d allocated\n", ++sumsum);
287                 vf = freelist.front();
288                 freelist.pop();  // Meh.
289         }
290         vf.len = 0;
291         vf.overflow = 0;
292
293         if (mjpeg_encoder != nullptr &&
294             mjpeg_encoder->should_encode_mjpeg_for_card(card_index) &&
295             vf.userdata != nullptr) {
296                 Userdata *ud = (Userdata *)vf.userdata;
297                 vf.data_copy = ud->data_copy_malloc;
298                 ud->data_copy_current_src = Userdata::FROM_MALLOC;
299         } else {
300                 vf.data_copy = nullptr;
301         }
302
303         return vf;
304 }
305
306 bmusb::FrameAllocator::Frame PBOFrameAllocator::create_frame(size_t width, size_t height, size_t stride)
307 {
308         Frame vf;
309
310         {
311                 lock_guard<mutex> lock(freelist_mutex);
312                 if (freelist.empty()) {
313                         printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
314                         vf.len = 0;
315                         vf.overflow = 0;
316                         return vf;
317                 } else {
318                         vf = freelist.front();
319                         freelist.pop();
320                 }
321         }
322         vf.len = 0;
323         vf.overflow = 0;
324
325         Userdata *userdata = (Userdata *)vf.userdata;
326
327         if (mjpeg_encoder != nullptr &&
328             mjpeg_encoder->should_encode_mjpeg_for_card(card_index)) {
329                 if (mjpeg_encoder->using_vaapi()) {
330                         VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
331                         MJPEGEncoder::VAResources resources = mjpeg_encoder->get_va_resources(width, height);
332                         MJPEGEncoder::ReleaseVAResources release(mjpeg_encoder, resources);
333
334                         if (resources.image.pitches[0] == stride) {
335                                 userdata->va_resources = move(resources);
336                                 userdata->va_resources_release = move(release);
337
338                                 VAStatus va_status = vaMapBuffer(va_dpy, resources.image.buf, (void **)&vf.data_copy);
339                                 CHECK_VASTATUS(va_status, "vaMapBuffer");
340                                 vf.data_copy += resources.image.offsets[0];
341                                 userdata->data_copy_current_src = Userdata::FROM_VA_API;
342                         } else {
343                                 printf("WARNING: Could not copy directly into VA-API MJPEG buffer for %zu x %zu, since producer and consumer disagreed on stride (%zu != %d).\n", width, height, stride, resources.image.pitches[0]);
344                                 vf.data_copy = userdata->data_copy_malloc;
345                                 userdata->data_copy_current_src = Userdata::FROM_MALLOC;
346                         }
347                 } else {
348                         vf.data_copy = userdata->data_copy_malloc;
349                         userdata->data_copy_current_src = Userdata::FROM_MALLOC;
350                 }
351         } else {
352                 vf.data_copy = nullptr;
353         }
354
355         return vf;
356 }
357
358 void PBOFrameAllocator::release_frame(Frame frame)
359 {
360         if (frame.overflow > 0) {
361                 printf("%d bytes overflow after last (PBO) frame\n", int(frame.overflow));
362         }
363
364 #if 0
365         // Poison the page. (Note that this might be bogus if you don't have an OpenGL context.)
366         memset(frame.data, 0, frame.size);
367         Userdata *userdata = (Userdata *)frame.userdata;
368         for (unsigned field = 0; field < 2; ++field) {
369                 glBindTexture(GL_TEXTURE_2D, userdata->tex_y[field]);
370                 check_error();
371                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
372                 check_error();
373                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
374                 check_error();
375                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
376                 check_error();
377                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, userdata->last_width[field], userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
378                 check_error();
379
380                 glBindTexture(GL_TEXTURE_2D, userdata->tex_cbcr[field]);
381                 check_error();
382                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
383                 check_error();
384                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
385                 check_error();
386                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
387                 check_error();
388                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, userdata->last_width[field] / 2, userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
389                 check_error();
390         }
391 #endif
392
393         {
394                 // In case we never got to upload the frame to MJPEGEncoder.
395                 Userdata *userdata = (Userdata *)frame.userdata;
396                 MJPEGEncoder::VAResources resources __attribute__((unused)) = move(userdata->va_resources);
397                 MJPEGEncoder::ReleaseVAResources release = move(userdata->va_resources_release);
398
399                 if (frame.data_copy != nullptr && userdata->data_copy_current_src == Userdata::FROM_VA_API) {
400                         VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
401                         VAStatus va_status = vaUnmapBuffer(va_dpy, resources.image.buf);
402                         CHECK_VASTATUS(va_status, "vaUnmapBuffer");
403
404                         frame.data_copy = nullptr;
405                 }
406         }
407
408         lock_guard<mutex> lock(freelist_mutex);
409         Userdata *userdata = (Userdata *)frame.userdata;
410         if (userdata->generation == generation) {
411                 freelist.push(frame);
412         } else {
413                 destroy_frame(&frame);
414         }
415         //--sumsum;
416 }
417
418 void PBOFrameAllocator::reconfigure(bmusb::PixelFormat pixel_format,
419                          size_t frame_size,
420                          GLuint width, GLuint height,
421                          unsigned card_index,
422                          MJPEGEncoder *mjpeg_encoder,
423                          size_t num_queued_frames,
424                          GLenum buffer,
425                          GLenum permissions,
426                          GLenum map_bits)
427 {
428         if (pixel_format == this->pixel_format &&
429             frame_size == this->frame_size &&
430             width == this->width && height == this->height &&
431             card_index == this->card_index &&
432             mjpeg_encoder == this->mjpeg_encoder &&
433             num_queued_frames == this->num_queued_frames &&
434             buffer == this->buffer &&
435             permissions == this->permissions &&
436             map_bits == this->map_bits) {
437                 return;
438         }
439
440         lock_guard<mutex> lock(freelist_mutex);
441         lingering_generations[generation] = LingeringGeneration{ move(userdata), this->num_queued_frames };
442         ++generation;
443
444         while (!freelist.empty()) {
445                 Frame frame = freelist.front();
446                 freelist.pop();
447                 destroy_frame(&frame);
448         }
449
450         this->pixel_format = pixel_format;
451         this->frame_size = frame_size;
452         this->width = width;
453         this->height = height;
454         this->card_index = card_index;
455         this->mjpeg_encoder = mjpeg_encoder;
456         this->num_queued_frames = num_queued_frames;
457         this->buffer = buffer;
458         this->permissions = permissions;
459         this->map_bits = map_bits;
460
461         userdata.reset(new Userdata[num_queued_frames]);
462         for (size_t i = 0; i < num_queued_frames; ++i) {
463                 init_frame(i, frame_size, width, height, permissions, map_bits, generation);
464         }
465
466         // There may still be frames out with the old configuration
467         // (for instance, living in GLWidget); they will be destroyed
468         // when they come back in release_frame().
469 }