From e131edb9e568383e2700666c6f91394ce895b07f Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 29 May 2020 22:27:43 +0200 Subject: [PATCH] In Futatabi, support 4:2:0 MJPEG decoding via VA-API. This was fairly easy now that all the VA-API code was unified with Nageru, which already supported 4:2:0. --- futatabi/vaapi_jpeg_decoder.cpp | 70 +++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/futatabi/vaapi_jpeg_decoder.cpp b/futatabi/vaapi_jpeg_decoder.cpp index 8fe1ac8..ff4c89e 100644 --- a/futatabi/vaapi_jpeg_decoder.cpp +++ b/futatabi/vaapi_jpeg_decoder.cpp @@ -29,6 +29,20 @@ using namespace std; +// TODO: Deduplicate between Nageru and this. +static void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height) +{ + if (src_width == dst_pitch) { + memcpy(dst, src, src_width * height); + } else { + for (size_t y = 0; y < height; ++y) { + const uint8_t *sptr = src + y * src_width; + uint8_t *dptr = dst + y * dst_pitch; + memcpy(dptr, sptr, src_width); + } + } +} + static unique_ptr va_dpy; static unique_ptr va_pool; @@ -171,12 +185,22 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); return nullptr; } - if (dinfo.comp_info[0].h_samp_factor != 2 || - dinfo.comp_info[1].h_samp_factor != 1 || - dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor || - dinfo.comp_info[2].h_samp_factor != 1 || - dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) { - fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n", + + const bool is_422 = + dinfo.comp_info[0].h_samp_factor == 2 && + dinfo.comp_info[1].h_samp_factor == 1 && + dinfo.comp_info[1].v_samp_factor == dinfo.comp_info[0].v_samp_factor && + dinfo.comp_info[2].h_samp_factor == 1 && + dinfo.comp_info[2].v_samp_factor == dinfo.comp_info[0].v_samp_factor; + const bool is_420 = + dinfo.comp_info[0].h_samp_factor == 2 && + dinfo.comp_info[0].v_samp_factor == 2 && + dinfo.comp_info[1].h_samp_factor == 1 && + dinfo.comp_info[1].v_samp_factor == 1 && + dinfo.comp_info[2].h_samp_factor == 1 && + dinfo.comp_info[2].v_samp_factor == 1; + if (!is_422 && !is_420) { + fprintf(stderr, "Not 4:2:2 or 4:2:0. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n", dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); @@ -199,7 +223,7 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) pic_param.color_space = 0; // YUV. pic_param.rotation = VA_ROTATION_NONE; - VAResourcePool::VAResources resources = va_pool->get_va_resources(dinfo.image_width, dinfo.image_height, VA_FOURCC_UYVY); + VAResourcePool::VAResources resources = va_pool->get_va_resources(dinfo.image_width, dinfo.image_height, is_422 ? VA_FOURCC_UYVY : VA_FOURCC_NV12); ReleaseVAResources release(va_pool.get(), resources); VABufferID pic_param_buffer; @@ -374,22 +398,34 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) uint8_t *y_pix = pbo.ptr; uint8_t *cbcr_pix = pbo.ptr + cbcr_offset; - const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0]; - if (resources.image.pitches[0] == dinfo.image_width * 2) { - memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2); - } else { - for (unsigned y = 0; y < dinfo.image_height; ++y) { - memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width, - src + y * resources.image.pitches[0], dinfo.image_width * 2); + unsigned cbcr_width = dinfo.image_width / 2; + unsigned cbcr_height; + if (is_422) { + const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0]; + if (resources.image.pitches[0] == dinfo.image_width * 2) { + memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2); + } else { + for (unsigned y = 0; y < dinfo.image_height; ++y) { + memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width, + src + y * resources.image.pitches[0], dinfo.image_width * 2); + } } + cbcr_height = dinfo.image_height; + } else { + assert(is_420); + const uint8_t *src_y = (const uint8_t *)mapped + resources.image.offsets[0]; + const uint8_t *src_cbcr = (const uint8_t *)mapped + resources.image.offsets[1]; + memcpy_with_pitch(y_pix, src_y, dinfo.image_width, resources.image.pitches[0], dinfo.image_height); + memcpy_with_pitch(cbcr_pix, src_cbcr, dinfo.image_width, resources.image.pitches[1], dinfo.image_height / 2); + cbcr_height = dinfo.image_height / 2; } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo); frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0)); - frame->cbcr = create_texture_2d(dinfo.image_width / 2, dinfo.image_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset)); + frame->cbcr = create_texture_2d(cbcr_width, cbcr_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset)); + glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + cbcr_width * cbcr_height * 2); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2); glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); frame->uploaded_ui_thread = pbo.upload_done; @@ -399,7 +435,7 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) frame->width = dinfo.image_width; frame->height = dinfo.image_height; frame->chroma_subsampling_x = 2; - frame->chroma_subsampling_y = 1; + frame->chroma_subsampling_y = is_420 ? 2 : 1; if (dinfo.marker_list != nullptr && dinfo.marker_list->marker == JPEG_APP0 + 1 && -- 2.39.2