X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fpbo_frame_allocator.cpp;h=709a2bf359e505ab7e089f567fce33b502427834;hb=HEAD;hp=6211937b701f21dc2bdfbe916781f71f4629fb6b;hpb=b44bf7cfce6a5aaffbcd1e37df39068a163438ad;p=nageru diff --git a/nageru/pbo_frame_allocator.cpp b/nageru/pbo_frame_allocator.cpp index 6211937..6ebe13a 100644 --- a/nageru/pbo_frame_allocator.cpp +++ b/nageru/pbo_frame_allocator.cpp @@ -1,14 +1,22 @@ #include "pbo_frame_allocator.h" #include +#include +#include #include +#include #include #include #include #include +#include +#include -#include "flags.h" +#include "mjpeg_encoder.h" +#include "defs.h" +#include "shared/va_resource_pool.h" #include "v210_converter.h" +#include "shared/va_display.h" using namespace std; @@ -26,12 +34,23 @@ void set_clamp_to_edge() } // namespace -PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits) - : pixel_format(pixel_format), buffer(buffer) +PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, unsigned card_index, MJPEGEncoder *mjpeg_encoder, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits) + : card_index(card_index), + mjpeg_encoder(mjpeg_encoder), + pixel_format(pixel_format), + buffer(buffer), + frame_size(frame_size), + num_queued_frames(num_queued_frames), + width(width), + height(height), + permissions(permissions), + map_bits(map_bits) { userdata.reset(new Userdata[num_queued_frames]); for (size_t i = 0; i < num_queued_frames; ++i) { - init_frame(i, frame_size, width, height, permissions, map_bits); + Frame frame; + init_frame(frame, &userdata[i], this, pixel_format, frame_size, width, height, permissions, map_bits, buffer, generation); + freelist.push(frame); } glBindBuffer(buffer, 0); check_error(); @@ -39,7 +58,7 @@ PBOFrameAllocator::PBOFrameAllocator(bmusb::PixelFormat pixel_format, size_t fra check_error(); } -void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits) +void PBOFrameAllocator::init_frame(Frame &frame, Userdata *ud, PBOFrameAllocator *owner, bmusb::PixelFormat pixel_format, size_t frame_size, GLuint width, GLuint height, GLenum permissions, GLenum map_bits, GLenum buffer, int generation) { GLuint pbo; glGenBuffers(1, &pbo); @@ -49,16 +68,16 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w glBufferStorage(buffer, frame_size, nullptr, permissions | GL_MAP_PERSISTENT_BIT); check_error(); - Frame frame; frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT); frame.data2 = frame.data + frame_size / 2; - frame.data_copy = new uint8_t[frame_size]; check_error(); frame.size = frame_size; - frame.userdata = &userdata[frame_idx]; - userdata[frame_idx].pbo = pbo; - userdata[frame_idx].pixel_format = pixel_format; - frame.owner = this; + frame.userdata = ud; + ud->generation = generation; + ud->pbo = pbo; + ud->pixel_format = pixel_format; + ud->data_copy_malloc = new uint8_t[frame_size]; + frame.owner = owner; // For 8-bit non-planar Y'CbCr, we ask the driver to split Y' and Cb/Cr // into separate textures. For 10-bit, the input format (v210) @@ -72,48 +91,48 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w // resolution is progressive. switch (pixel_format) { case bmusb::PixelFormat_8BitYCbCr: - glGenTextures(2, userdata[frame_idx].tex_y); + glGenTextures(2, ud->tex_y); check_error(); - glGenTextures(2, userdata[frame_idx].tex_cbcr); + glGenTextures(2, ud->tex_cbcr); check_error(); break; case bmusb::PixelFormat_10BitYCbCr: - glGenTextures(2, userdata[frame_idx].tex_v210); + glGenTextures(2, ud->tex_v210); check_error(); - glGenTextures(2, userdata[frame_idx].tex_444); + glGenTextures(2, ud->tex_444); check_error(); break; case bmusb::PixelFormat_8BitBGRA: - glGenTextures(2, userdata[frame_idx].tex_rgba); + glGenTextures(2, ud->tex_rgba); check_error(); break; case bmusb::PixelFormat_8BitYCbCrPlanar: - glGenTextures(2, userdata[frame_idx].tex_y); + glGenTextures(2, ud->tex_y); check_error(); - glGenTextures(2, userdata[frame_idx].tex_cb); + glGenTextures(2, ud->tex_cb); check_error(); - glGenTextures(2, userdata[frame_idx].tex_cr); + glGenTextures(2, ud->tex_cr); check_error(); break; default: assert(false); } - userdata[frame_idx].last_width[0] = width; - userdata[frame_idx].last_height[0] = height; - userdata[frame_idx].last_cbcr_width[0] = width / 2; - userdata[frame_idx].last_cbcr_height[0] = height; - userdata[frame_idx].last_v210_width[0] = 0; - - userdata[frame_idx].last_width[1] = 0; - userdata[frame_idx].last_height[1] = 0; - userdata[frame_idx].last_cbcr_width[1] = 0; - userdata[frame_idx].last_cbcr_height[1] = 0; - userdata[frame_idx].last_v210_width[1] = 0; - - userdata[frame_idx].last_interlaced = false; - userdata[frame_idx].last_has_signal = false; - userdata[frame_idx].last_is_connected = false; + ud->last_width[0] = width; + ud->last_height[0] = height; + ud->last_cbcr_width[0] = width / 2; + ud->last_cbcr_height[0] = height; + ud->last_v210_width[0] = 0; + + ud->last_width[1] = 0; + ud->last_height[1] = 0; + ud->last_cbcr_width[1] = 0; + ud->last_cbcr_height[1] = 0; + ud->last_v210_width[1] = 0; + + ud->last_interlaced = false; + ud->last_has_signal = false; + ud->last_is_connected = false; for (unsigned field = 0; field < 2; ++field) { switch (pixel_format) { case bmusb::PixelFormat_10BitYCbCr: { @@ -122,17 +141,17 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w // Seemingly we need to set the minification filter even though // shader image loads don't use them, or NVIDIA will just give us // zero back. - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_v210[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_v210[field]); check_error(); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); check_error(); if (field == 0) { - userdata[frame_idx].last_v210_width[0] = v210_width; + ud->last_v210_width[0] = v210_width; glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB10_A2, v210_width, height, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr); check_error(); } - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_444[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_444[field]); check_error(); set_clamp_to_edge(); if (field == 0) { @@ -142,7 +161,7 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w break; } case bmusb::PixelFormat_8BitYCbCr: - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_y[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]); check_error(); set_clamp_to_edge(); if (field == 0) { @@ -150,7 +169,7 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w check_error(); } - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_cbcr[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_cbcr[field]); check_error(); set_clamp_to_edge(); if (field == 0) { @@ -159,20 +178,16 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w } break; case bmusb::PixelFormat_8BitBGRA: - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_rgba[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_rgba[field]); check_error(); set_clamp_to_edge(); if (field == 0) { - if (global_flags.can_disable_srgb_decoder) { // See the comments in tweaked_inputs.h. - glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr); - } else { - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr); - } + glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr); check_error(); } break; case bmusb::PixelFormat_8BitYCbCrPlanar: - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_y[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_y[field]); check_error(); set_clamp_to_edge(); if (field == 0) { @@ -180,7 +195,7 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w check_error(); } - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_cb[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_cb[field]); check_error(); set_clamp_to_edge(); if (field == 0) { @@ -188,7 +203,7 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w check_error(); } - glBindTexture(GL_TEXTURE_2D, userdata[frame_idx].tex_cr[field]); + glBindTexture(GL_TEXTURE_2D, ud->tex_cr[field]); check_error(); set_clamp_to_edge(); if (field == 0) { @@ -200,8 +215,6 @@ void PBOFrameAllocator::init_frame(size_t frame_idx, size_t frame_size, GLuint w assert(false); } } - - freelist.push(frame); } PBOFrameAllocator::~PBOFrameAllocator() @@ -215,9 +228,10 @@ PBOFrameAllocator::~PBOFrameAllocator() void PBOFrameAllocator::destroy_frame(Frame *frame) { - delete[] frame->data_copy; + Userdata *ud = (Userdata *)frame->userdata; + delete[] ud->data_copy_malloc; - GLuint pbo = ((Userdata *)frame->userdata)->pbo; + GLuint pbo = ud->pbo; glBindBuffer(buffer, pbo); check_error(); glUnmapBuffer(buffer); @@ -226,40 +240,48 @@ void PBOFrameAllocator::destroy_frame(Frame *frame) check_error(); glDeleteBuffers(1, &pbo); check_error(); - switch (pixel_format) { + switch (ud->pixel_format) { case bmusb::PixelFormat_10BitYCbCr: - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_v210); + glDeleteTextures(2, ud->tex_v210); check_error(); - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_444); + glDeleteTextures(2, ud->tex_444); check_error(); break; case bmusb::PixelFormat_8BitYCbCr: - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_y); + glDeleteTextures(2, ud->tex_y); check_error(); - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_cbcr); + glDeleteTextures(2, ud->tex_cbcr); check_error(); break; case bmusb::PixelFormat_8BitBGRA: - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_rgba); + glDeleteTextures(2, ud->tex_rgba); check_error(); break; case bmusb::PixelFormat_8BitYCbCrPlanar: - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_y); + glDeleteTextures(2, ud->tex_y); check_error(); - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_cb); + glDeleteTextures(2, ud->tex_cb); check_error(); - glDeleteTextures(2, ((Userdata *)frame->userdata)->tex_cr); + glDeleteTextures(2, ud->tex_cr); check_error(); break; default: assert(false); } + + if (ud->generation != generation) { + auto it = lingering_generations.find(ud->generation); + assert(it != lingering_generations.end()); + if (--it->second.num_frames_left == 0) { + lingering_generations.erase(it); // Deallocates the userdata block. + } + } } //static int sumsum = 0; bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame() { - Frame vf; + Frame vf; lock_guard lock(freelist_mutex); // Meh. if (freelist.empty()) { @@ -271,6 +293,86 @@ bmusb::FrameAllocator::Frame PBOFrameAllocator::alloc_frame() } vf.len = 0; vf.overflow = 0; + + if (mjpeg_encoder != nullptr && + mjpeg_encoder->should_encode_mjpeg_for_card(card_index) && + vf.userdata != nullptr) { + Userdata *ud = (Userdata *)vf.userdata; + vf.data_copy = ud->data_copy_malloc; + ud->data_copy_current_src = Userdata::FROM_MALLOC; + } else { + vf.data_copy = nullptr; + } + + return vf; +} + +bmusb::FrameAllocator::Frame PBOFrameAllocator::create_frame(size_t width, size_t height, size_t stride) +{ + Frame vf; + + size_t desired_frame_bytes = width * stride; + if (stride > 8192 * 4 || height > 8192 || desired_frame_bytes > MAX_FRAME_SIZE) { + return vf; + } + + { + lock_guard lock(freelist_mutex); + if (freelist.empty()) { + printf("Frame overrun (no more spare PBO frames), dropping frame!\n"); + vf.len = 0; + vf.overflow = 0; + return vf; + } else { + vf = freelist.front(); + freelist.pop(); + } + } + + Userdata *userdata = (Userdata *)vf.userdata; + assert(generation == userdata->generation); + if (vf.size < desired_frame_bytes || (vf.size > FRAME_SIZE && vf.size > desired_frame_bytes * 2)) { + // Frame is either too small or way too large, so reallocate it. + // Note that width and height now automatically becomes the right size + // (the one we just asked for, instead of the default for the allocator, + // which is generally the global resolution); it doesn't matter + // for correctness, since we'll recreate the texture on upload if needed, + // but it is nice to save that step. + destroy_frame(&vf); + init_frame(vf, userdata, this, pixel_format, std::max(desired_frame_bytes, FRAME_SIZE), width, height, permissions, map_bits, buffer, generation); + }; + + vf.len = 0; + vf.overflow = 0; + + if (mjpeg_encoder != nullptr && + mjpeg_encoder->should_encode_mjpeg_for_card(card_index)) { + if (mjpeg_encoder->using_vaapi()) { + VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy; + VAResourcePool::VAResources resources = mjpeg_encoder->get_va_pool()->get_va_resources(width, height, VA_FOURCC_UYVY); // Only used by DeckLinkCapture, so always 4:2:2. + ReleaseVAResources release(mjpeg_encoder->get_va_pool(), resources); + + if (resources.image.pitches[0] == stride) { + userdata->va_resources = move(resources); + userdata->va_resources_release = move(release); + + VAStatus va_status = vaMapBuffer(va_dpy, resources.image.buf, (void **)&vf.data_copy); + CHECK_VASTATUS(va_status, "vaMapBuffer"); + vf.data_copy += resources.image.offsets[0]; + userdata->data_copy_current_src = Userdata::FROM_VA_API; + } else { + printf("WARNING: Could not copy directly into VA-API MJPEG buffer for %zu x %zu, since producer and consumer disagreed on stride (%zu != %d).\n", width, height, stride, resources.image.pitches[0]); + vf.data_copy = userdata->data_copy_malloc; + userdata->data_copy_current_src = Userdata::FROM_MALLOC; + } + } else { + vf.data_copy = userdata->data_copy_malloc; + userdata->data_copy_current_src = Userdata::FROM_MALLOC; + } + } else { + vf.data_copy = nullptr; + } + return vf; } @@ -309,7 +411,82 @@ void PBOFrameAllocator::release_frame(Frame frame) } #endif + { + // In case we never got to upload the frame to MJPEGEncoder. + Userdata *userdata = (Userdata *)frame.userdata; + VAResourcePool::VAResources resources __attribute__((unused)) = move(userdata->va_resources); + ReleaseVAResources release = move(userdata->va_resources_release); + + if (frame.data_copy != nullptr && userdata->data_copy_current_src == Userdata::FROM_VA_API) { + VADisplay va_dpy = mjpeg_encoder->va_dpy->va_dpy; + VAStatus va_status = vaUnmapBuffer(va_dpy, resources.image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); + + frame.data_copy = nullptr; + } + } + lock_guard lock(freelist_mutex); - freelist.push(frame); + Userdata *userdata = (Userdata *)frame.userdata; + if (userdata->generation == generation) { + freelist.push(frame); + } else { + destroy_frame(&frame); + } //--sumsum; } + +void PBOFrameAllocator::reconfigure(bmusb::PixelFormat pixel_format, + size_t frame_size, + GLuint width, GLuint height, + unsigned card_index, + MJPEGEncoder *mjpeg_encoder, + size_t num_queued_frames, + GLenum buffer, + GLenum permissions, + GLenum map_bits) +{ + if (pixel_format == this->pixel_format && + frame_size == this->frame_size && + width == this->width && height == this->height && + card_index == this->card_index && + mjpeg_encoder == this->mjpeg_encoder && + num_queued_frames == this->num_queued_frames && + buffer == this->buffer && + permissions == this->permissions && + map_bits == this->map_bits) { + return; + } + + lock_guard lock(freelist_mutex); + lingering_generations[generation] = LingeringGeneration{ move(userdata), this->num_queued_frames }; + ++generation; + + while (!freelist.empty()) { + Frame frame = freelist.front(); + freelist.pop(); + destroy_frame(&frame); + } + + this->pixel_format = pixel_format; + this->frame_size = frame_size; + this->width = width; + this->height = height; + this->card_index = card_index; + this->mjpeg_encoder = mjpeg_encoder; + this->num_queued_frames = num_queued_frames; + this->buffer = buffer; + this->permissions = permissions; + this->map_bits = map_bits; + + userdata.reset(new Userdata[num_queued_frames]); + for (size_t i = 0; i < num_queued_frames; ++i) { + Frame frame; + init_frame(frame, &userdata[i], this, pixel_format, frame_size, width, height, permissions, map_bits, buffer, generation); + freelist.push(frame); + } + + // There may still be frames out with the old configuration + // (for instance, living in GLWidget); they will be destroyed + // when they come back in release_frame(). +}