X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fmjpeg_encoder.cpp;h=01173e0ac1ed723b6740ce4e5340f3447ca3742e;hb=575f6eb1b052bb1291987753b1a8cccc7f1e3ab3;hp=07e302c4e93148c2b9a83edfc1034e5465f1e8bc;hpb=0b776ba19a0b0703f87e5529b2e4f82af6b50435;p=nageru diff --git a/nageru/mjpeg_encoder.cpp b/nageru/mjpeg_encoder.cpp index 07e302c..01173e0 100644 --- a/nageru/mjpeg_encoder.cpp +++ b/nageru/mjpeg_encoder.cpp @@ -28,13 +28,9 @@ extern "C" { using namespace bmusb; using namespace std; -extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height); +static VAImageFormat uyvy_format; -#define CHECK_VASTATUS(va_status, func) \ - if (va_status != VA_STATUS_SUCCESS) { \ - fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ - exit(1); \ - } +extern void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height); // From libjpeg (although it's of course identical between implementations). static const int jpeg_natural_order[DCTSIZE2] = { @@ -244,6 +240,7 @@ unique_ptr MJPEGEncoder::try_open_va(const string &va_disp return nullptr; } + // TODO: Unify with the code in Futatabi. int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy); assert(num_formats > 0); @@ -256,6 +253,19 @@ unique_ptr MJPEGEncoder::try_open_va(const string &va_disp return nullptr; } + bool found = false; + for (int i = 0; i < num_formats; ++i) { + if (formats[i].fourcc == VA_FOURCC_UYVY) { + memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat)); + found = true; + break; + } + } + if (!found) { + if (error != nullptr) *error = "UYVY format not found"; + return nullptr; + } + return va_dpy; } @@ -294,6 +304,34 @@ void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFram any_frames_to_be_encoded.notify_all(); } +void MJPEGEncoder::finish_frame(RefCountedFrame frame) +{ + PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata; + + if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) { + VAResources resources __attribute__((unused)) = move(userdata->va_resources); + ReleaseVAResources release = move(userdata->va_resources_release); + + VAStatus va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); + } +} + +int MJPEGEncoder::get_mjpeg_stream_for_card(unsigned card_index) +{ + // Only bother doing MJPEG encoding if there are any connected clients + // that want the stream. + if (httpd->get_num_connected_multicam_clients() == 0) { + return -1; + } + + auto it = global_flags.card_to_mjpeg_stream_export.find(card_index); + if (it == global_flags.card_to_mjpeg_stream_export.end()) { + return -1; + } + return it->second; +} + void MJPEGEncoder::encoder_thread_func() { pthread_setname_np(pthread_self(), "MJPEG_Encode"); @@ -396,6 +434,9 @@ MJPEGEncoder::VAResources MJPEGEncoder::get_va_resources(unsigned width, unsigne va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAEncCodedBufferType, width * height * 3 + 8192, 1, nullptr, &ret.data_buffer); CHECK_VASTATUS(va_status, "vaCreateBuffer"); + va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image); + CHECK_VASTATUS(va_status, "vaCreateImage"); + return ret; } @@ -597,11 +638,20 @@ MJPEGEncoder::VAData MJPEGEncoder::get_va_data_for_resolution(unsigned width, un void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf) { + PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)qf.frame->userdata; unsigned width = qf.video_format.width; unsigned height = qf.video_format.height; - VAResources resources = get_va_resources(width, height); - ReleaseVAResources release(this, resources); + VAResources resources; + ReleaseVAResources release; + if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) { + resources = move(userdata->va_resources); + release = move(userdata->va_resources_release); + } else { + assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC); + resources = get_va_resources(width, height); + release = ReleaseVAResources(this, resources); + } VAData va_data = get_va_data_for_resolution(width, height); va_data.pic_param.coded_buf = resources.data_buffer; @@ -626,27 +676,36 @@ void MJPEGEncoder::encode_jpeg_va(QueuedFrame &&qf) CHECK_VASTATUS(va_status, "vaCreateBuffer"); VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer); - VAImage image; - va_status = vaDeriveImage(va_dpy->va_dpy, resources.surface, &image); - CHECK_VASTATUS(va_status, "vaDeriveImage"); + if (userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_VA_API) { + va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); + // The pixel data is already put into the image by the caller. + } else { + assert(userdata->data_copy_current_src == PBOFrameAllocator::Userdata::FROM_MALLOC); - // Upload the pixel data. - uint8_t *surface_p = nullptr; - vaMapBuffer(va_dpy->va_dpy, image.buf, (void **)&surface_p); + // Upload the pixel data. + uint8_t *surface_p = nullptr; + vaMapBuffer(va_dpy->va_dpy, resources.image.buf, (void **)&surface_p); - size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support. - size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2; + size_t field_start_line = qf.video_format.extra_lines_top; // No interlacing support. + size_t field_start = qf.cbcr_offset * 2 + qf.video_format.width * field_start_line * 2; - { - const uint8_t *src = qf.frame->data_copy + field_start; - uint8_t *dst = (unsigned char *)surface_p + image.offsets[0]; - memcpy_with_pitch(dst, src, qf.video_format.width * 2, image.pitches[0], qf.video_format.height); + { + const uint8_t *src = qf.frame->data_copy + field_start; + uint8_t *dst = (unsigned char *)surface_p + resources.image.offsets[0]; + memcpy_with_pitch(dst, src, qf.video_format.width * 2, resources.image.pitches[0], qf.video_format.height); + } + + va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf); + CHECK_VASTATUS(va_status, "vaUnmapBuffer"); } - va_status = vaUnmapBuffer(va_dpy->va_dpy, image.buf); - CHECK_VASTATUS(va_status, "vaUnmapBuffer"); - va_status = vaDestroyImage(va_dpy->va_dpy, image.image_id); - CHECK_VASTATUS(va_status, "vaDestroyImage"); + // Seemingly vaPutImage() (which triggers a GPU copy) is much nicer to the + // CPU than vaDeriveImage() and copying directly into the GPU's buffers. + // Exactly why is unclear, but it seems to involve L3 cache usage when there + // are many high-res (1080p+) images in play. + va_status = vaPutImage(va_dpy->va_dpy, resources.surface, resources.image.image_id, 0, 0, width, height, 0, 0, width, height); + CHECK_VASTATUS(va_status, "vaPutImage"); // Finally, stick in the JPEG header. VAEncPackedHeaderParameterBuffer header_parm;