]> git.sesse.net Git - nageru/blob - nageru/pbo_frame_allocator.cpp
Fix a Clang 19 warning.
[nageru] / nageru / pbo_frame_allocator.cpp
1 #include "pbo_frame_allocator.h"
2
3 #include <bmusb/bmusb.h>
4 #include <assert.h>
5 #include <epoxy/gl.h>
6 #include <movit/util.h>
7 #include <mutex>
8 #include <stdbool.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <cstddef>
12 #include <utility>
13 #include <va/va.h>
14
15 #include "mjpeg_encoder.h"
16 #include "defs.h"
17 #include "shared/va_resource_pool.h"
18 #include "v210_converter.h"
19 #include "shared/va_display.h"
20
21 using namespace std;
22
23 namespace {
24
25 void set_clamp_to_edge()
26 {
27         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
28         check_error();
29         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
30         check_error();
31         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
32         check_error();
33 }
34
35 }  // namespace
36
37 PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, unsigned card_index, MJPEGEncoder *mjpeg_encoder, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
38         : card_index(card_index),
39           mjpeg_encoder(mjpeg_encoder),
40           pixel_format(pixel_format),
41           buffer(buffer),
42           frame_size(frame_size),
43           num_queued_frames(num_queued_frames),
44           width(width),
45           height(height),
46           permissions(permissions),
47           map_bits(map_bits)
48 {
49         userdata.reset(new Userdata[num_queued_frames]);
50         for (size_t i = 0; i < num_queued_frames; ++i) {
51                 Frame frame;
52                 init_frame(frame, &userdata[i], this, pixel_format, frame_size, width, height, permissions, map_bits, buffer, generation);
53                 freelist.push(frame);
54         }
55         glBindBuffer(buffer, 0);
56         check_error();
57         glBindTexture(GL_TEXTURE_2D, 0);
58         check_error();
59 }
60
61 void PBOFrameAllocator::init_frame(Frame &frame, Userdata *ud, PBOFrameAllocator *owner, bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits, GLenum buffer, int generation)
62 {
63         GLuint pbo;
64         glGenBuffers(1, &pbo);
65         check_error();
66         glBindBuffer(buffer, pbo);
67         check_error();
68         glBufferStorage(buffer, frame_size, nullptr, permissions | GL_MAP_PERSISTENT_BIT);
69         check_error();
70
71         frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
72         frame.data2 = frame.data + frame_size / 2;
73         check_error();
74         frame.size = frame_size;
75         frame.userdata = ud;
76         ud->generation = generation;
77         ud->pbo = pbo;
78         ud->pixel_format = pixel_format;
79         ud->data_copy_malloc = new uint8_t[frame_size];
80         frame.owner = owner;
81
82         // For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr
83         // into separate textures. For 10-bit, the input format (v210)
84         // is complicated enough that we need to interpolate up to 4:4:4,
85         // which we do in a compute shader ourselves. For BGRA, the data
86         // is already 4:4:4:4.
87         frame.interleaved = (pixel_format == bmusb::PixelFormat_8BitYCbCr);
88
89         // Create textures. We don't allocate any data for the second field at this point
90         // (just create the texture state with the samplers), since our default assumed
91         // resolution is progressive.
92         switch (pixel_format) {
93         case bmusb::PixelFormat_8BitYCbCr:
94                 glGenTextures(2, ud->tex_y);
95                 check_error();
96                 glGenTextures(2, ud->tex_cbcr);
97                 check_error();
98                 break;
99         case bmusb::PixelFormat_10BitYCbCr:
100                 glGenTextures(2, ud->tex_v210);
101                 check_error();
102                 glGenTextures(2, ud->tex_444);
103                 check_error();
104                 break;
105         case bmusb::PixelFormat_8BitBGRA:
106                 glGenTextures(2, ud->tex_rgba);
107                 check_error();
108                 break;
109         case bmusb::PixelFormat_8BitYCbCrPlanar:
110                 glGenTextures(2, ud->tex_y);
111                 check_error();
112                 glGenTextures(2, ud->tex_cb);
113                 check_error();
114                 glGenTextures(2, ud->tex_cr);
115                 check_error();
116                 break;
117         default:
118                 assert(false);
119         }
120
121         ud->last_width[0] = width;
122         ud->last_height[0] = height;
123         ud->last_cbcr_width[0] = width / 2;
124         ud->last_cbcr_height[0] = height;
125         ud->last_v210_width[0] = 0;
126
127         ud->last_width[1] = 0;
128         ud->last_height[1] = 0;
129         ud->last_cbcr_width[1] = 0;
130         ud->last_cbcr_height[1] = 0;
131         ud->last_v210_width[1] = 0;
132
133         ud->last_interlaced = false;
134         ud->last_has_signal = false;
135         ud->last_is_connected = false;
136         for (unsigned field = 0; field < 2; ++field) {
137                 switch (pixel_format) {
138                 case bmusb::PixelFormat_10BitYCbCr: {
139                         const size_t v210_width = v210Converter::get_minimum_v210_texture_width(width);
140
141                         // Seemingly we need to set the minification filter even though
142                         // shader image loads don't use them, or NVIDIA will just give us
143                         // zero back.
144                         glBindTexture(GL_TEXTURE_2D, ud->tex_v210[field]);
145                         check_error();
146                         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
147                         check_error();
148                         if (field == 0) {
149                                 ud->last_v210_width[0] = v210_width;
150                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, v210_width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
151                                 check_error();
152                         }
153
154                         glBindTexture(GL_TEXTURE_2D, ud->tex_444[field]);
155                         check_error();
156                         set_clamp_to_edge();
157                         if (field == 0) {
158                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
159                                 check_error();
160                         }
161                         break;
162                 }
163                 case bmusb::PixelFormat_8BitYCbCr:
164                         glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
165                         check_error();
166                         set_clamp_to_edge();
167                         if (field == 0) {
168                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
169                                 check_error();
170                         }
171
172                         glBindTexture(GL_TEXTURE_2D, ud->tex_cbcr[field]);
173                         check_error();
174                         set_clamp_to_edge();
175                         if (field == 0) {
176                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, width / 2, height, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
177                                 check_error();
178                         }
179                         break;
180                 case bmusb::PixelFormat_8BitBGRA:
181                         glBindTexture(GL_TEXTURE_2D, ud->tex_rgba[field]);
182                         check_error();
183                         set_clamp_to_edge();
184                         if (field == 0) {
185                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
186                                 check_error();
187                         }
188                         break;
189                 case bmusb::PixelFormat_8BitYCbCrPlanar:
190                         glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
191                         check_error();
192                         set_clamp_to_edge();
193                         if (field == 0) {
194                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
195                                 check_error();
196                         }
197
198                         glBindTexture(GL_TEXTURE_2D, ud->tex_cb[field]);
199                         check_error();
200                         set_clamp_to_edge();
201                         if (field == 0) {
202                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
203                                 check_error();
204                         }
205
206                         glBindTexture(GL_TEXTURE_2D, ud->tex_cr[field]);
207                         check_error();
208                         set_clamp_to_edge();
209                         if (field == 0) {
210                                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
211                                 check_error();
212                         }
213                         break;
214                 default:
215                         assert(false);
216                 }
217         }
218 }
219
220 PBOFrameAllocator::~PBOFrameAllocator()
221 {
222         while (!freelist.empty()) {
223                 Frame frame = freelist.front();
224                 freelist.pop();
225                 destroy_frame(&frame);
226         }
227 }
228
229 void PBOFrameAllocator::destroy_frame(Frame *frame)
230 {
231         Userdata *ud = (Userdata *)frame->userdata;
232         delete[] ud->data_copy_malloc;
233
234         GLuint pbo = ud->pbo;
235         glBindBuffer(buffer, pbo);
236         check_error();
237         glUnmapBuffer(buffer);
238         check_error();
239         glBindBuffer(buffer, 0);
240         check_error();
241         glDeleteBuffers(1, &pbo);
242         check_error();
243         switch (ud->pixel_format) {
244         case bmusb::PixelFormat_10BitYCbCr:
245                 glDeleteTextures(2, ud->tex_v210);
246                 check_error();
247                 glDeleteTextures(2, ud->tex_444);
248                 check_error();
249                 break;
250         case bmusb::PixelFormat_8BitYCbCr:
251                 glDeleteTextures(2, ud->tex_y);
252                 check_error();
253                 glDeleteTextures(2, ud->tex_cbcr);
254                 check_error();
255                 break;
256         case bmusb::PixelFormat_8BitBGRA:
257                 glDeleteTextures(2, ud->tex_rgba);
258                 check_error();
259                 break;
260         case bmusb::PixelFormat_8BitYCbCrPlanar:
261                 glDeleteTextures(2, ud->tex_y);
262                 check_error();
263                 glDeleteTextures(2, ud->tex_cb);
264                 check_error();
265                 glDeleteTextures(2, ud->tex_cr);
266                 check_error();
267                 break;
268         default:
269                 assert(false);
270         }
271
272         if (ud->generation != generation) {
273                 auto it = lingering_generations.find(ud->generation);
274                 assert(it != lingering_generations.end());
275                 if (--it->second.num_frames_left == 0) {
276                         lingering_generations.erase(it);  // Deallocates the userdata block.
277                 }
278         }
279 }
280 //static int sumsum = 0;
281
282 bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame()
283 {
284         Frame vf;
285
286         lock_guard<mutex> lock(freelist_mutex);  // Meh.
287         if (freelist.empty()) {
288                 printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
289         } else {
290                 //fprintf(stderr, "freelist has %d allocated\n", ++sumsum);
291                 vf = freelist.front();
292                 freelist.pop();  // Meh.
293         }
294         vf.len = 0;
295         vf.overflow = 0;
296
297         if (mjpeg_encoder != nullptr &&
298             mjpeg_encoder->should_encode_mjpeg_for_card(card_index) &&
299             vf.userdata != nullptr) {
300                 Userdata *ud = (Userdata *)vf.userdata;
301                 vf.data_copy = ud->data_copy_malloc;
302                 ud->data_copy_current_src = Userdata::FROM_MALLOC;
303         } else {
304                 vf.data_copy = nullptr;
305         }
306
307         return vf;
308 }
309
310 bmusb::FrameAllocator::Frame PBOFrameAllocator::create_frame(size_t width, size_t height, size_t stride)
311 {
312         Frame vf;
313
314         size_t desired_frame_bytes = width * stride;
315         if (stride > 8192 * 4 || height > 8192 || desired_frame_bytes > MAX_FRAME_SIZE) {
316                 return vf;
317         }
318
319         {
320                 lock_guard<mutex> lock(freelist_mutex);
321                 if (freelist.empty()) {
322                         printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
323                         vf.len = 0;
324                         vf.overflow = 0;
325                         return vf;
326                 } else {
327                         vf = freelist.front();
328                         freelist.pop();
329                 }
330         }
331
332         Userdata *userdata = (Userdata *)vf.userdata;
333         assert(generation == userdata->generation);
334         if (vf.size < desired_frame_bytes || (vf.size > FRAME_SIZE && vf.size > desired_frame_bytes * 2)) {
335                 // Frame is either too small or way too large, so reallocate it.
336                 // Note that width and height now automatically becomes the right size
337                 // (the one we just asked for, instead of the default for the allocator,
338                 // which is generally the global resolution); it doesn't matter
339                 // for correctness, since we'll recreate the texture on upload if needed,
340                 // but it is nice to save that step.
341                 destroy_frame(&vf);
342                 init_frame(vf, userdata, this, pixel_format, std::max<size_t>(desired_frame_bytes, FRAME_SIZE), width, height, permissions, map_bits, buffer, generation);
343         };
344
345         vf.len = 0;
346         vf.overflow = 0;
347
348         if (mjpeg_encoder != nullptr &&
349             mjpeg_encoder->should_encode_mjpeg_for_card(card_index)) {
350                 if (mjpeg_encoder->using_vaapi()) {
351                         VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
352                         VAResourcePool::VAResources resources = mjpeg_encoder->get_va_pool()->get_va_resources(width, height, VA_FOURCC_UYVY);  // Only used by DeckLinkCapture, so always 4:2:2.
353                         ReleaseVAResources release(mjpeg_encoder->get_va_pool(), resources);
354
355                         if (resources.image.pitches[0] == stride) {
356                                 userdata->va_resources = move(resources);
357                                 userdata->va_resources_release = move(release);
358
359                                 VAStatus va_status = vaMapBuffer(va_dpy, resources.image.buf, (void **)&vf.data_copy);
360                                 CHECK_VASTATUS(va_status, "vaMapBuffer");
361                                 vf.data_copy += resources.image.offsets[0];
362                                 userdata->data_copy_current_src = Userdata::FROM_VA_API;
363                         } else {
364                                 printf("WARNING: Could not copy directly into VA-API MJPEG buffer for %zu x %zu, since producer and consumer disagreed on stride (%zu != %d).\n", width, height, stride, resources.image.pitches[0]);
365                                 vf.data_copy = userdata->data_copy_malloc;
366                                 userdata->data_copy_current_src = Userdata::FROM_MALLOC;
367                         }
368                 } else {
369                         vf.data_copy = userdata->data_copy_malloc;
370                         userdata->data_copy_current_src = Userdata::FROM_MALLOC;
371                 }
372         } else {
373                 vf.data_copy = nullptr;
374         }
375
376         return vf;
377 }
378
379 void PBOFrameAllocator::release_frame(Frame frame)
380 {
381         if (frame.overflow > 0) {
382                 printf("%d bytes overflow after last (PBO) frame\n", int(frame.overflow));
383         }
384
385 #if 0
386         // Poison the page. (Note that this might be bogus if you don't have an OpenGL context.)
387         memset(frame.data, 0, frame.size);
388         Userdata *userdata = (Userdata *)frame.userdata;
389         for (unsigned field = 0; field < 2; ++field) {
390                 glBindTexture(GL_TEXTURE_2D, userdata->tex_y[field]);
391                 check_error();
392                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
393                 check_error();
394                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
395                 check_error();
396                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
397                 check_error();
398                 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, userdata->last_width[field], userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
399                 check_error();
400
401                 glBindTexture(GL_TEXTURE_2D, userdata->tex_cbcr[field]);
402                 check_error();
403                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
404                 check_error();
405                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
406                 check_error();
407                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
408                 check_error();
409                 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, userdata->last_width[field] / 2, userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
410                 check_error();
411         }
412 #endif
413
414         {
415                 // In case we never got to upload the frame to MJPEGEncoder.
416                 Userdata *userdata = (Userdata *)frame.userdata;
417                 VAResourcePool::VAResources resources __attribute__((unused)) = move(userdata->va_resources);
418                 ReleaseVAResources release = move(userdata->va_resources_release);
419
420                 if (frame.data_copy != nullptr && userdata->data_copy_current_src == Userdata::FROM_VA_API) {
421                         VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
422                         VAStatus va_status = vaUnmapBuffer(va_dpy, resources.image.buf);
423                         CHECK_VASTATUS(va_status, "vaUnmapBuffer");
424
425                         frame.data_copy = nullptr;
426                 }
427         }
428
429         lock_guard<mutex> lock(freelist_mutex);
430         Userdata *userdata = (Userdata *)frame.userdata;
431         if (userdata->generation == generation) {
432                 freelist.push(frame);
433         } else {
434                 destroy_frame(&frame);
435         }
436         //--sumsum;
437 }
438
439 void PBOFrameAllocator::reconfigure(bmusb::PixelFormat pixel_format,
440                          size_t frame_size,
441                          GLuint width, GLuint height,
442                          unsigned card_index,
443                          MJPEGEncoder *mjpeg_encoder,
444                          size_t num_queued_frames,
445                          GLenum buffer,
446                          GLenum permissions,
447                          GLenum map_bits)
448 {
449         if (pixel_format == this->pixel_format &&
450             frame_size == this->frame_size &&
451             width == this->width && height == this->height &&
452             card_index == this->card_index &&
453             mjpeg_encoder == this->mjpeg_encoder &&
454             num_queued_frames == this->num_queued_frames &&
455             buffer == this->buffer &&
456             permissions == this->permissions &&
457             map_bits == this->map_bits) {
458                 return;
459         }
460
461         lock_guard<mutex> lock(freelist_mutex);
462         lingering_generations[generation] = LingeringGeneration{ move(userdata), this->num_queued_frames };
463         ++generation;
464
465         while (!freelist.empty()) {
466                 Frame frame = freelist.front();
467                 freelist.pop();
468                 destroy_frame(&frame);
469         }
470
471         this->pixel_format = pixel_format;
472         this->frame_size = frame_size;
473         this->width = width;
474         this->height = height;
475         this->card_index = card_index;
476         this->mjpeg_encoder = mjpeg_encoder;
477         this->num_queued_frames = num_queued_frames;
478         this->buffer = buffer;
479         this->permissions = permissions;
480         this->map_bits = map_bits;
481
482         userdata.reset(new Userdata[num_queued_frames]);
483         for (size_t i = 0; i < num_queued_frames; ++i) {
484                 Frame frame;
485                 init_frame(frame, &userdata[i], this, pixel_format, frame_size, width, height, permissions, map_bits, buffer, generation);
486                 freelist.push(frame);
487         }
488
489         // There may still be frames out with the old configuration
490         // (for instance, living in GLWidget); they will be destroyed
491         // when they come back in release_frame().
492 }