1 #include "pbo_frame_allocator.h"
3 #include <bmusb/bmusb.h>
6 #include <movit/util.h>
15 #include "mjpeg_encoder.h"
17 #include "shared/va_resource_pool.h"
18 #include "v210_converter.h"
19 #include "shared/va_display.h"
25 void set_clamp_to_edge()
27 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
29 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
31 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
37 PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, unsigned card_index, MJPEGEncoder *mjpeg_encoder, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits)
38 : card_index(card_index),
39 mjpeg_encoder(mjpeg_encoder),
40 pixel_format(pixel_format),
42 frame_size(frame_size),
43 num_queued_frames(num_queued_frames),
46 permissions(permissions),
49 userdata.reset(new Userdata[num_queued_frames]);
50 for (size_t i = 0; i < num_queued_frames; ++i) {
52 init_frame(frame, &userdata[i], this, pixel_format, frame_size, width, height, permissions, map_bits, buffer, generation);
55 glBindBuffer(buffer, 0);
57 glBindTexture(GL_TEXTURE_2D, 0);
61 void PBOFrameAllocator::init_frame(Frame &frame, Userdata *ud, PBOFrameAllocator *owner, bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits, GLenum buffer, int generation)
64 glGenBuffers(1, &pbo);
66 glBindBuffer(buffer, pbo);
68 glBufferStorage(buffer, frame_size, nullptr, permissions | GL_MAP_PERSISTENT_BIT);
71 frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
72 frame.data2 = frame.data + frame_size / 2;
74 frame.size = frame_size;
76 ud->generation = generation;
78 ud->pixel_format = pixel_format;
79 ud->data_copy_malloc = new uint8_t[frame_size];
82 // For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr
83 // into separate textures. For 10-bit, the input format (v210)
84 // is complicated enough that we need to interpolate up to 4:4:4,
85 // which we do in a compute shader ourselves. For BGRA, the data
86 // is already 4:4:4:4.
87 frame.interleaved = (pixel_format == bmusb::PixelFormat_8BitYCbCr);
89 // Create textures. We don't allocate any data for the second field at this point
90 // (just create the texture state with the samplers), since our default assumed
91 // resolution is progressive.
92 switch (pixel_format) {
93 case bmusb::PixelFormat_8BitYCbCr:
94 glGenTextures(2, ud->tex_y);
96 glGenTextures(2, ud->tex_cbcr);
99 case bmusb::PixelFormat_10BitYCbCr:
100 glGenTextures(2, ud->tex_v210);
102 glGenTextures(2, ud->tex_444);
105 case bmusb::PixelFormat_8BitBGRA:
106 glGenTextures(2, ud->tex_rgba);
109 case bmusb::PixelFormat_8BitYCbCrPlanar:
110 glGenTextures(2, ud->tex_y);
112 glGenTextures(2, ud->tex_cb);
114 glGenTextures(2, ud->tex_cr);
121 ud->last_width[0] = width;
122 ud->last_height[0] = height;
123 ud->last_cbcr_width[0] = width / 2;
124 ud->last_cbcr_height[0] = height;
125 ud->last_v210_width[0] = 0;
127 ud->last_width[1] = 0;
128 ud->last_height[1] = 0;
129 ud->last_cbcr_width[1] = 0;
130 ud->last_cbcr_height[1] = 0;
131 ud->last_v210_width[1] = 0;
133 ud->last_interlaced = false;
134 ud->last_has_signal = false;
135 ud->last_is_connected = false;
136 for (unsigned field = 0; field < 2; ++field) {
137 switch (pixel_format) {
138 case bmusb::PixelFormat_10BitYCbCr: {
139 const size_t v210_width = v210Converter::get_minimum_v210_texture_width(width);
141 // Seemingly we need to set the minification filter even though
142 // shader image loads don't use them, or NVIDIA will just give us
144 glBindTexture(GL_TEXTURE_2D, ud->tex_v210[field]);
146 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
149 ud->last_v210_width[0] = v210_width;
150 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, v210_width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
154 glBindTexture(GL_TEXTURE_2D, ud->tex_444[field]);
158 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
163 case bmusb::PixelFormat_8BitYCbCr:
164 glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
168 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
172 glBindTexture(GL_TEXTURE_2D, ud->tex_cbcr[field]);
176 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, width / 2, height, 0, GL_RG, GL_UNSIGNED_BYTE, nullptr);
180 case bmusb::PixelFormat_8BitBGRA:
181 glBindTexture(GL_TEXTURE_2D, ud->tex_rgba[field]);
185 glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
189 case bmusb::PixelFormat_8BitYCbCrPlanar:
190 glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]);
194 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
198 glBindTexture(GL_TEXTURE_2D, ud->tex_cb[field]);
202 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
206 glBindTexture(GL_TEXTURE_2D, ud->tex_cr[field]);
210 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width / 2, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
220 PBOFrameAllocator::~PBOFrameAllocator()
222 while (!freelist.empty()) {
223 Frame frame = freelist.front();
225 destroy_frame(&frame);
229 void PBOFrameAllocator::destroy_frame(Frame *frame)
231 Userdata *ud = (Userdata *)frame->userdata;
232 delete[] ud->data_copy_malloc;
234 GLuint pbo = ud->pbo;
235 glBindBuffer(buffer, pbo);
237 glUnmapBuffer(buffer);
239 glBindBuffer(buffer, 0);
241 glDeleteBuffers(1, &pbo);
243 switch (ud->pixel_format) {
244 case bmusb::PixelFormat_10BitYCbCr:
245 glDeleteTextures(2, ud->tex_v210);
247 glDeleteTextures(2, ud->tex_444);
250 case bmusb::PixelFormat_8BitYCbCr:
251 glDeleteTextures(2, ud->tex_y);
253 glDeleteTextures(2, ud->tex_cbcr);
256 case bmusb::PixelFormat_8BitBGRA:
257 glDeleteTextures(2, ud->tex_rgba);
260 case bmusb::PixelFormat_8BitYCbCrPlanar:
261 glDeleteTextures(2, ud->tex_y);
263 glDeleteTextures(2, ud->tex_cb);
265 glDeleteTextures(2, ud->tex_cr);
272 if (ud->generation != generation) {
273 auto it = lingering_generations.find(ud->generation);
274 assert(it != lingering_generations.end());
275 if (--it->second.num_frames_left == 0) {
276 lingering_generations.erase(it); // Deallocates the userdata block.
280 //static int sumsum = 0;
282 bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame()
286 lock_guard<mutex> lock(freelist_mutex); // Meh.
287 if (freelist.empty()) {
288 printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
290 //fprintf(stderr, "freelist has %d allocated\n", ++sumsum);
291 vf = freelist.front();
292 freelist.pop(); // Meh.
297 if (mjpeg_encoder != nullptr &&
298 mjpeg_encoder->should_encode_mjpeg_for_card(card_index) &&
299 vf.userdata != nullptr) {
300 Userdata *ud = (Userdata *)vf.userdata;
301 vf.data_copy = ud->data_copy_malloc;
302 ud->data_copy_current_src = Userdata::FROM_MALLOC;
304 vf.data_copy = nullptr;
310 bmusb::FrameAllocator::Frame PBOFrameAllocator::create_frame(size_t width, size_t height, size_t stride)
314 size_t desired_frame_bytes = width * stride;
315 if (stride > 8192 * 4 || height > 8192 || desired_frame_bytes > MAX_FRAME_SIZE) {
320 lock_guard<mutex> lock(freelist_mutex);
321 if (freelist.empty()) {
322 printf("Frame overrun (no more spare PBO frames), dropping frame!\n");
327 vf = freelist.front();
332 Userdata *userdata = (Userdata *)vf.userdata;
333 assert(generation == userdata->generation);
334 if (vf.size < desired_frame_bytes || (vf.size > FRAME_SIZE && vf.size > desired_frame_bytes * 2)) {
335 // Frame is either too small or way too large, so reallocate it.
336 // Note that width and height now automatically becomes the right size
337 // (the one we just asked for, instead of the default for the allocator,
338 // which is generally the global resolution); it doesn't matter
339 // for correctness, since we'll recreate the texture on upload if needed,
340 // but it is nice to save that step.
342 init_frame(vf, userdata, this, pixel_format, std::max<size_t>(desired_frame_bytes, FRAME_SIZE), width, height, permissions, map_bits, buffer, generation);
348 if (mjpeg_encoder != nullptr &&
349 mjpeg_encoder->should_encode_mjpeg_for_card(card_index)) {
350 if (mjpeg_encoder->using_vaapi()) {
351 VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
352 VAResourcePool::VAResources resources = mjpeg_encoder->get_va_pool()->get_va_resources(width, height, VA_FOURCC_UYVY); // Only used by DeckLinkCapture, so always 4:2:2.
353 ReleaseVAResources release(mjpeg_encoder->get_va_pool(), resources);
355 if (resources.image.pitches[0] == stride) {
356 userdata->va_resources = move(resources);
357 userdata->va_resources_release = move(release);
359 VAStatus va_status = vaMapBuffer(va_dpy, resources.image.buf, (void **)&vf.data_copy);
360 CHECK_VASTATUS(va_status, "vaMapBuffer");
361 vf.data_copy += resources.image.offsets[0];
362 userdata->data_copy_current_src = Userdata::FROM_VA_API;
364 printf("WARNING: Could not copy directly into VA-API MJPEG buffer for %zu x %zu, since producer and consumer disagreed on stride (%zu != %d).\n", width, height, stride, resources.image.pitches[0]);
365 vf.data_copy = userdata->data_copy_malloc;
366 userdata->data_copy_current_src = Userdata::FROM_MALLOC;
369 vf.data_copy = userdata->data_copy_malloc;
370 userdata->data_copy_current_src = Userdata::FROM_MALLOC;
373 vf.data_copy = nullptr;
379 void PBOFrameAllocator::release_frame(Frame frame)
381 if (frame.overflow > 0) {
382 printf("%d bytes overflow after last (PBO) frame\n", int(frame.overflow));
386 // Poison the page. (Note that this might be bogus if you don't have an OpenGL context.)
387 memset(frame.data, 0, frame.size);
388 Userdata *userdata = (Userdata *)frame.userdata;
389 for (unsigned field = 0; field < 2; ++field) {
390 glBindTexture(GL_TEXTURE_2D, userdata->tex_y[field]);
392 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
394 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
396 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
398 glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, userdata->last_width[field], userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
401 glBindTexture(GL_TEXTURE_2D, userdata->tex_cbcr[field]);
403 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
405 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
407 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
409 glTexImage2D(GL_TEXTURE_2D, 0, GL_RG8, userdata->last_width[field] / 2, userdata->last_height[field], 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
415 // In case we never got to upload the frame to MJPEGEncoder.
416 Userdata *userdata = (Userdata *)frame.userdata;
417 VAResourcePool::VAResources resources __attribute__((unused)) = move(userdata->va_resources);
418 ReleaseVAResources release = move(userdata->va_resources_release);
420 if (frame.data_copy != nullptr && userdata->data_copy_current_src == Userdata::FROM_VA_API) {
421 VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy;
422 VAStatus va_status = vaUnmapBuffer(va_dpy, resources.image.buf);
423 CHECK_VASTATUS(va_status, "vaUnmapBuffer");
425 frame.data_copy = nullptr;
429 lock_guard<mutex> lock(freelist_mutex);
430 Userdata *userdata = (Userdata *)frame.userdata;
431 if (userdata->generation == generation) {
432 freelist.push(frame);
434 destroy_frame(&frame);
439 void PBOFrameAllocator::reconfigure(bmusb::PixelFormat pixel_format,
441 GLuint width, GLuint height,
443 MJPEGEncoder *mjpeg_encoder,
444 size_t num_queued_frames,
449 if (pixel_format == this->pixel_format &&
450 frame_size == this->frame_size &&
451 width == this->width && height == this->height &&
452 card_index == this->card_index &&
453 mjpeg_encoder == this->mjpeg_encoder &&
454 num_queued_frames == this->num_queued_frames &&
455 buffer == this->buffer &&
456 permissions == this->permissions &&
457 map_bits == this->map_bits) {
461 lock_guard<mutex> lock(freelist_mutex);
462 lingering_generations[generation] = LingeringGeneration{ move(userdata), this->num_queued_frames };
465 while (!freelist.empty()) {
466 Frame frame = freelist.front();
468 destroy_frame(&frame);
471 this->pixel_format = pixel_format;
472 this->frame_size = frame_size;
474 this->height = height;
475 this->card_index = card_index;
476 this->mjpeg_encoder = mjpeg_encoder;
477 this->num_queued_frames = num_queued_frames;
478 this->buffer = buffer;
479 this->permissions = permissions;
480 this->map_bits = map_bits;
482 userdata.reset(new Userdata[num_queued_frames]);
483 for (size_t i = 0; i < num_queued_frames; ++i) {
485 init_frame(frame, &userdata[i], this, pixel_format, frame_size, width, height, permissions, map_bits, buffer, generation);
486 freelist.push(frame);
489 // There may still be frames out with the old configuration
490 // (for instance, living in GLWidget); they will be destroyed
491 // when they come back in release_frame().