X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fmjpeg_encoder.cpp;h=46bb94c7639112f76b031c96c63acab9560b99ce;hb=9ffd4f03f314cc6e0254449593def95c9bc203d6;hp=9ae018f81059584621bb383b3f4d6b30c01df0f4;hpb=a839022c035b3d9387feabc02843c166ac78b469;p=nageru diff --git a/nageru/mjpeg_encoder.cpp b/nageru/mjpeg_encoder.cpp index 9ae018f..46bb94c 100644 --- a/nageru/mjpeg_encoder.cpp +++ b/nageru/mjpeg_encoder.cpp @@ -28,6 +28,8 @@ extern "C" { using namespace bmusb; using namespace std; +static VAImageFormat uyvy_format; + extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height); // From libjpeg (although it's of course identical between implementations). @@ -129,7 +131,7 @@ MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display) AVStream *stream = avformat_new_stream(avctx.get(), nullptr); if (stream == nullptr) { fprintf(stderr, "avformat_new_stream() failed\n"); - exit(1); + abort(); } stream->time_base = AVRational{ 1, TIMEBASE }; stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; @@ -157,7 +159,7 @@ MJPEGEncoder::MJPEGEncoder(HTTPD *httpd, const string &va_display) } if (avformat_write_header(avctx.get(), &options) < 0) { fprintf(stderr, "avformat_write_header() failed\n"); - exit(1); + abort(); } // Initialize VA-API. @@ -238,6 +240,7 @@ unique_ptr MJPEGEncoder::try_open_va(const string &va_disp return nullptr; } + // TODO: Unify with the code in Futatabi. int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy); assert(num_formats > 0); @@ -250,6 +253,19 @@ unique_ptr MJPEGEncoder::try_open_va(const string &va_disp return nullptr; } + bool found = false; + for (int i = 0; i < num_formats; ++i) { + if (formats[i].fourcc == VA_FOURCC_UYVY) { + memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat)); + found = true; + break; + } + } + if (!found) { + if (error != nullptr) *error = "UYVY format not found"; + return nullptr; + } + return va_dpy; } @@ -288,22 +304,6 @@ void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFram any_frames_to_be_encoded.notify_all(); } -void MJPEGEncoder::finish_frame(RefCountedFrame frame) -{ - PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata; - - if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) { - VAResources resources __attribute__((unused)) = move(userdata->va_resources); - ReleaseVAResources release = move(userdata->va_resources_release); - VAImage image = move(userdata->va_image); - - VAStatus va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf); - CHECK_VASTATUS(va_status, "vaUnmapBuffer"); - va_status = vaDestroyImage(va_dpy->va_dpy, image.image_id); - CHECK_VASTATUS(va_status, "vaDestroyImage"); - } -} - int MJPEGEncoder::get_mjpeg_stream_for_card(unsigned card_index) { // Only bother doing MJPEG encoding if there are any connected clients @@ -367,7 +367,7 @@ void MJPEGEncoder::write_mjpeg_packet(int64_t pts, unsigned card_index, const ui if (av_write_frame(avctx.get(), &pkt) < 0) { fprintf(stderr, "av_write_frame() failed\n"); - exit(1); + abort(); } } @@ -421,13 +421,16 @@ MJPEGEncoder::VAResources MJPEGEncoder::get_va_resources(unsigned width, unsigne va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncCodedBufferType, width * height * 3 + 8192, 1, nullptr, &ret.data_buffer); CHECK_VASTATUS(va_status, "vaCreateBuffer"); + va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image); + CHECK_VASTATUS(va_status, "vaCreateImage"); + return ret; } void MJPEGEncoder::release_va_resources(MJPEGEncoder::VAResources resources) { lock_guard lock(va_resources_mutex); - if (va_resources_freelist.size() > 10) { + if (va_resources_freelist.size() > 50) { auto it = va_resources_freelist.end(); --it; @@ -440,6 +443,9 @@ void MJPEGEncoder::release_va_resources(MJPEGEncoder::VAResources resources) va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1); CHECK_VASTATUS(va_status, "vaDestroySurfaces"); + va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id); + CHECK_VASTATUS(va_status, "vaDestroyImage"); + va_resources_freelist.erase(it); } @@ -660,34 +666,38 @@ void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf) CHECK_VASTATUS(va_status, "vaCreateBuffer"); VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer); - VAImage image; if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) { - // The pixel data is already uploaded by the caller. - image = move(userdata->va_image); + // The pixel data is already put into the image by the caller. + va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); } else { assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC); // Upload the pixel data. - va_status = vaDeriveImage(va_dpy->va_dpy, resources.surface, &image); - CHECK_VASTATUS(va_status, "vaDeriveImage"); - uint8_t *surface_p = nullptr; - vaMapBuffer(va_dpy->va_dpy, image.buf, (void **)&surface_p); + vaMapBuffer(va_dpy->va_dpy, resources.image.buf, (void **)&surface_p); size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support. size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2; { const uint8_t *src = qf.frame->data_copy + field_start; - uint8_t *dst = (unsigned char *)surface_p + image.offsets[0]; - memcpy_with_pitch(dst, src, qf.video_format.width * 2, image.pitches[0], qf.video_format.height); + uint8_t *dst = (unsigned char *)surface_p + resources.image.offsets[0]; + memcpy_with_pitch(dst, src, qf.video_format.width * 2, resources.image.pitches[0], qf.video_format.height); } + + va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); } - va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf); - CHECK_VASTATUS(va_status, "vaUnmapBuffer"); - va_status = vaDestroyImage(va_dpy->va_dpy, image.image_id); - CHECK_VASTATUS(va_status, "vaDestroyImage"); + qf.frame->data_copy = nullptr; + + // Seemingly vaPutImage() (which triggers a GPU copy) is much nicer to the + // CPU than vaDeriveImage() and copying directly into the GPU's buffers. + // Exactly why is unclear, but it seems to involve L3 cache usage when there + // are many high-res (1080p+) images in play. + va_status = vaPutImage(va_dpy->va_dpy, resources.surface, resources.image.image_id, 0, 0, width, height, 0, 0, width, height); + CHECK_VASTATUS(va_status, "vaPutImage"); // Finally, stick in the JPEG header. VAEncPackedHeaderParameterBuffer header_parm;