X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=futatabi%2Fvaapi_jpeg_decoder.cpp;h=ff4c89e9d06feb59f9c5d1ca5b9f63b387fd4026;hb=5a95cccfdacb0da1091ad71a1777a3045d7497e4;hp=0441514a8b815efe62509e45490b595cc0a4562b;hpb=0da2817ad80aeab299902c55b306f57376054a7e;p=nageru diff --git a/futatabi/vaapi_jpeg_decoder.cpp b/futatabi/vaapi_jpeg_decoder.cpp index 0441514..ff4c89e 100644 --- a/futatabi/vaapi_jpeg_decoder.cpp +++ b/futatabi/vaapi_jpeg_decoder.cpp @@ -5,6 +5,8 @@ #include "jpeglib_error_wrapper.h" #include "pbo_pool.h" #include "shared/memcpy_interleaved.h" +#include "shared/va_display.h" +#include "shared/va_resource_pool.h" #include #include @@ -27,31 +29,24 @@ using namespace std; -static unique_ptr va_dpy; -static VAConfigID config_id; -static VAImageFormat uyvy_format; -bool vaapi_jpeg_decoding_usable = false; - -struct VAResources { - unsigned width, height; - VASurfaceID surface; - VAContextID context; - VAImage image; -}; -static list va_resources_freelist; -static mutex va_resources_mutex; - -#define CHECK_VASTATUS(va_status, func) \ - if (va_status != VA_STATUS_SUCCESS) { \ - fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ - abort(); \ +// TODO: Deduplicate between Nageru and this. +static void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height) +{ + if (src_width == dst_pitch) { + memcpy(dst, src, src_width * height); + } else { + for (size_t y = 0; y < height; ++y) { + const uint8_t *sptr = src + y * src_width; + uint8_t *dptr = dst + y * dst_pitch; + memcpy(dptr, sptr, src_width); + } } +} -#define CHECK_VASTATUS_RET(va_status, func) \ - if (va_status != VA_STATUS_SUCCESS) { \ - fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ - return nullptr; \ - } +static unique_ptr va_dpy; +static unique_ptr va_pool; + +bool vaapi_jpeg_decoding_usable = false; // From libjpeg (although it's of course identical between implementations). static const int jpeg_natural_order[DCTSIZE2] = { @@ -65,163 +60,24 @@ static const int jpeg_natural_order[DCTSIZE2] = { 53, 60, 61, 54, 47, 55, 62, 63, }; -VAResources get_va_resources(unsigned width, unsigned height) -{ - { - lock_guard lock(va_resources_mutex); - for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) { - if (it->width == width && it->height == height) { - VAResources ret = *it; - va_resources_freelist.erase(it); - return ret; - } - } - } - - VAResources ret; - - ret.width = width; - ret.height = height; - - VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422, - width, height, - &ret.surface, 1, nullptr, 0); - CHECK_VASTATUS(va_status, "vaCreateSurfaces"); - - va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context); - CHECK_VASTATUS(va_status, "vaCreateContext"); - - va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image); - CHECK_VASTATUS(va_status, "vaCreateImage"); - - return ret; -} - -void release_va_resources(VAResources resources) -{ - lock_guard lock(va_resources_mutex); - if (va_resources_freelist.size() > 10) { - auto it = va_resources_freelist.end(); - --it; - - VAStatus va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id); - CHECK_VASTATUS(va_status, "vaDestroyImage"); - - va_status = vaDestroyContext(va_dpy->va_dpy, it->context); - CHECK_VASTATUS(va_status, "vaDestroyContext"); - - va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1); - CHECK_VASTATUS(va_status, "vaDestroySurfaces"); - - va_resources_freelist.erase(it); - } - - va_resources_freelist.push_front(resources); -} - -// RAII wrapper to release VAResources on return (even on error). -class ReleaseVAResources { -public: - ReleaseVAResources(const VAResources &resources) - : resources(resources) {} - ~ReleaseVAResources() - { - if (!committed) { - release_va_resources(resources); - } - } - - void commit() { committed = true; } - -private: - const VAResources &resources; - bool committed = false; -}; - -VADisplayWithCleanup::~VADisplayWithCleanup() +static unique_ptr try_open_va_mjpeg(const string &va_display) { - if (va_dpy != nullptr) { - vaTerminate(va_dpy); - } - if (x11_display != nullptr) { - XCloseDisplay(x11_display); - } - if (drm_fd != -1) { - close(drm_fd); - } -} - -unique_ptr va_open_display(const string &va_display) -{ - if (va_display.empty() || va_display[0] != '/') { // An X display. - Display *x11_display = XOpenDisplay(va_display.empty() ? nullptr : va_display.c_str()); - if (x11_display == nullptr) { - fprintf(stderr, "error: can't connect to X server!\n"); - return nullptr; - } - - unique_ptr ret(new VADisplayWithCleanup); - ret->x11_display = x11_display; - ret->va_dpy = vaGetDisplay(x11_display); - if (ret->va_dpy == nullptr) { - return nullptr; - } - return ret; - } else { // A DRM node on the filesystem (e.g. /dev/dri/renderD128). - int drm_fd = open(va_display.c_str(), O_RDWR); - if (drm_fd == -1) { - perror(va_display.c_str()); - return nullptr; - } - unique_ptr ret(new VADisplayWithCleanup); - ret->drm_fd = drm_fd; - ret->va_dpy = vaGetDisplayDRM(drm_fd); - if (ret->va_dpy == nullptr) { - return nullptr; - } - return ret; - } -} - -unique_ptr try_open_va(const string &va_display, string *error) -{ - unique_ptr va_dpy = va_open_display(va_display); + VAConfigID config_id_422, config_id_420; + VAImageFormat uyvy_format, nv12_format; + + // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/ + unique_ptr va_dpy = + try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointVLD, + { { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id_422, &uyvy_format }, + { "4:2:0", VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, &config_id_420, &nv12_format } }, + /*chosen_profile=*/nullptr, /*error=*/nullptr); if (va_dpy == nullptr) { - if (error) - *error = "Opening VA display failed"; - return nullptr; - } - int major_ver, minor_ver; - VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver); - if (va_status != VA_STATUS_SUCCESS) { - char buf[256]; - snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status); - if (error != nullptr) - *error = buf; - return nullptr; - } - - int num_entrypoints = vaMaxNumEntrypoints(va_dpy->va_dpy); - unique_ptr entrypoints(new VAEntrypoint[num_entrypoints]); - if (entrypoints == nullptr) { - if (error != nullptr) - *error = "Failed to allocate memory for VA entry points"; - return nullptr; - } - - vaQueryConfigEntrypoints(va_dpy->va_dpy, VAProfileJPEGBaseline, entrypoints.get(), &num_entrypoints); - for (int slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) { - if (entrypoints[slice_entrypoint] != VAEntrypointVLD) { - continue; - } - - // We found a usable decode, so return it. return va_dpy; } - if (error != nullptr) - *error = "Can't find VAEntrypointVLD for the JPEG profile"; - return nullptr; + va_pool.reset(new VAResourcePool(va_dpy->va_dpy, uyvy_format, nv12_format, config_id_422, config_id_420, /*with_data_buffer=*/false)); + + return va_dpy; } string get_usable_va_display() @@ -235,7 +91,7 @@ string get_usable_va_display() } // First try the default (ie., whatever $DISPLAY is set to). - unique_ptr va_dpy = try_open_va("", nullptr); + unique_ptr va_dpy = try_open_va_mjpeg(""); if (va_dpy != nullptr) { if (need_env_reset) { unsetenv("LIBVA_MESSAGING_LEVEL"); @@ -253,7 +109,7 @@ string get_usable_va_display() } else { for (size_t i = 0; i < g.gl_pathc; ++i) { string path = g.gl_pathv[i]; - va_dpy = try_open_va(path, nullptr); + va_dpy = try_open_va_mjpeg(path); if (va_dpy != nullptr) { fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n", path.c_str()); @@ -280,37 +136,11 @@ void init_jpeg_vaapi() return; } - va_dpy = try_open_va(dpy, nullptr); + va_dpy = try_open_va_mjpeg(dpy); if (va_dpy == nullptr) { return; } - VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 }; - - VAStatus va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointVLD, - &attr, 1, &config_id); - CHECK_VASTATUS(va_status, "vaCreateConfig"); - - int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy); - assert(num_formats > 0); - - unique_ptr formats(new VAImageFormat[num_formats]); - va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats); - CHECK_VASTATUS(va_status, "vaQueryImageFormats"); - - bool found = false; - for (int i = 0; i < num_formats; ++i) { - // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/ - if (formats[i].fourcc == VA_FOURCC_UYVY) { - memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat)); - found = true; - break; - } - } - if (!found) { - return; - } - fprintf(stderr, "VA-API JPEG decoding initialized.\n"); vaapi_jpeg_decoding_usable = true; } @@ -355,12 +185,22 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); return nullptr; } - if (dinfo.comp_info[0].h_samp_factor != 2 || - dinfo.comp_info[1].h_samp_factor != 1 || - dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor || - dinfo.comp_info[2].h_samp_factor != 1 || - dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) { - fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n", + + const bool is_422 = + dinfo.comp_info[0].h_samp_factor == 2 && + dinfo.comp_info[1].h_samp_factor == 1 && + dinfo.comp_info[1].v_samp_factor == dinfo.comp_info[0].v_samp_factor && + dinfo.comp_info[2].h_samp_factor == 1 && + dinfo.comp_info[2].v_samp_factor == dinfo.comp_info[0].v_samp_factor; + const bool is_420 = + dinfo.comp_info[0].h_samp_factor == 2 && + dinfo.comp_info[0].v_samp_factor == 2 && + dinfo.comp_info[1].h_samp_factor == 1 && + dinfo.comp_info[1].v_samp_factor == 1 && + dinfo.comp_info[2].h_samp_factor == 1 && + dinfo.comp_info[2].v_samp_factor == 1; + if (!is_422 && !is_420) { + fprintf(stderr, "Not 4:2:2 or 4:2:0. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n", dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); @@ -383,8 +223,8 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) pic_param.color_space = 0; // YUV. pic_param.rotation = VA_ROTATION_NONE; - VAResources resources = get_va_resources(dinfo.image_width, dinfo.image_height); - ReleaseVAResources release(resources); + VAResourcePool::VAResources resources = va_pool->get_va_resources(dinfo.image_width, dinfo.image_height, is_422 ? VA_FOURCC_UYVY : VA_FOURCC_NV12); + ReleaseVAResources release(va_pool.get(), resources); VABufferID pic_param_buffer; VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer); @@ -558,22 +398,34 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) uint8_t *y_pix = pbo.ptr; uint8_t *cbcr_pix = pbo.ptr + cbcr_offset; - const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0]; - if (resources.image.pitches[0] == dinfo.image_width * 2) { - memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2); - } else { - for (unsigned y = 0; y < dinfo.image_height; ++y) { - memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width, - src + y * resources.image.pitches[0], dinfo.image_width * 2); + unsigned cbcr_width = dinfo.image_width / 2; + unsigned cbcr_height; + if (is_422) { + const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0]; + if (resources.image.pitches[0] == dinfo.image_width * 2) { + memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2); + } else { + for (unsigned y = 0; y < dinfo.image_height; ++y) { + memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width, + src + y * resources.image.pitches[0], dinfo.image_width * 2); + } } + cbcr_height = dinfo.image_height; + } else { + assert(is_420); + const uint8_t *src_y = (const uint8_t *)mapped + resources.image.offsets[0]; + const uint8_t *src_cbcr = (const uint8_t *)mapped + resources.image.offsets[1]; + memcpy_with_pitch(y_pix, src_y, dinfo.image_width, resources.image.pitches[0], dinfo.image_height); + memcpy_with_pitch(cbcr_pix, src_cbcr, dinfo.image_width, resources.image.pitches[1], dinfo.image_height / 2); + cbcr_height = dinfo.image_height / 2; } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo); frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0)); - frame->cbcr = create_texture_2d(dinfo.image_width / 2, dinfo.image_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset)); + frame->cbcr = create_texture_2d(cbcr_width, cbcr_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset)); + glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + cbcr_width * cbcr_height * 2); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2); glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); frame->uploaded_ui_thread = pbo.upload_done; @@ -583,7 +435,7 @@ shared_ptr decode_jpeg_vaapi(const string &jpeg) frame->width = dinfo.image_width; frame->height = dinfo.image_height; frame->chroma_subsampling_x = 2; - frame->chroma_subsampling_y = 1; + frame->chroma_subsampling_y = is_420 ? 2 : 1; if (dinfo.marker_list != nullptr && dinfo.marker_list->marker == JPEG_APP0 + 1 &&