]> git.sesse.net Git - nageru/blobdiff - futatabi/vaapi_jpeg_decoder.cpp
Fix a dangling reference (found by GCC 14).
[nageru] / futatabi / vaapi_jpeg_decoder.cpp
index 0441514a8b815efe62509e45490b595cc0a4562b..ff4c89e9d06feb59f9c5d1ca5b9f63b387fd4026 100644 (file)
@@ -5,6 +5,8 @@
 #include "jpeglib_error_wrapper.h"
 #include "pbo_pool.h"
 #include "shared/memcpy_interleaved.h"
+#include "shared/va_display.h"
+#include "shared/va_resource_pool.h"
 
 #include <X11/Xlib.h>
 #include <assert.h>
 
 using namespace std;
 
-static unique_ptr<VADisplayWithCleanup> va_dpy;
-static VAConfigID config_id;
-static VAImageFormat uyvy_format;
-bool vaapi_jpeg_decoding_usable = false;
-
-struct VAResources {
-       unsigned width, height;
-       VASurfaceID surface;
-       VAContextID context;
-       VAImage image;
-};
-static list<VAResources> va_resources_freelist;
-static mutex va_resources_mutex;
-
-#define CHECK_VASTATUS(va_status, func) \
-       if (va_status != VA_STATUS_SUCCESS) { \
-               fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
-               abort(); \
+// TODO: Deduplicate between Nageru and this.
+static void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_t dst_pitch, size_t height)
+{
+       if (src_width == dst_pitch) {
+               memcpy(dst, src, src_width * height);
+       } else {
+               for (size_t y = 0; y < height; ++y) {
+                       const uint8_t *sptr = src + y * src_width;
+                       uint8_t *dptr = dst + y * dst_pitch;
+                       memcpy(dptr, sptr, src_width);
+               }
        }
+}
 
-#define CHECK_VASTATUS_RET(va_status, func) \
-       if (va_status != VA_STATUS_SUCCESS) { \
-               fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
-               return nullptr; \
-       }
+static unique_ptr<VADisplayWithCleanup> va_dpy;
+static unique_ptr<VAResourcePool> va_pool;
+
+bool vaapi_jpeg_decoding_usable = false;
 
 // From libjpeg (although it's of course identical between implementations).
 static const int jpeg_natural_order[DCTSIZE2] = {
@@ -65,163 +60,24 @@ static const int jpeg_natural_order[DCTSIZE2] = {
        53, 60, 61, 54, 47, 55, 62, 63,
 };
 
-VAResources get_va_resources(unsigned width, unsigned height)
-{
-       {
-               lock_guard<mutex> lock(va_resources_mutex);
-               for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
-                       if (it->width == width && it->height == height) {
-                               VAResources ret = *it;
-                               va_resources_freelist.erase(it);
-                               return ret;
-                       }
-               }
-       }
-
-       VAResources ret;
-
-       ret.width = width;
-       ret.height = height;
-
-       VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
-                                             width, height,
-                                             &ret.surface, 1, nullptr, 0);
-       CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
-       va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
-       CHECK_VASTATUS(va_status, "vaCreateContext");
-
-       va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image);
-       CHECK_VASTATUS(va_status, "vaCreateImage");
-
-       return ret;
-}
-
-void release_va_resources(VAResources resources)
-{
-       lock_guard<mutex> lock(va_resources_mutex);
-       if (va_resources_freelist.size() > 10) {
-               auto it = va_resources_freelist.end();
-               --it;
-
-               VAStatus va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id);
-               CHECK_VASTATUS(va_status, "vaDestroyImage");
-
-               va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
-               CHECK_VASTATUS(va_status, "vaDestroyContext");
-
-               va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
-               CHECK_VASTATUS(va_status, "vaDestroySurfaces");
-
-               va_resources_freelist.erase(it);
-       }
-
-       va_resources_freelist.push_front(resources);
-}
-
-// RAII wrapper to release VAResources on return (even on error).
-class ReleaseVAResources {
-public:
-       ReleaseVAResources(const VAResources &resources)
-               : resources(resources) {}
-       ~ReleaseVAResources()
-       {
-               if (!committed) {
-                       release_va_resources(resources);
-               }
-       }
-
-       void commit() { committed = true; }
-
-private:
-       const VAResources &resources;
-       bool committed = false;
-};
-
-VADisplayWithCleanup::~VADisplayWithCleanup()
+static unique_ptr<VADisplayWithCleanup> try_open_va_mjpeg(const string &va_display)
 {
-       if (va_dpy != nullptr) {
-               vaTerminate(va_dpy);
-       }
-       if (x11_display != nullptr) {
-               XCloseDisplay(x11_display);
-       }
-       if (drm_fd != -1) {
-               close(drm_fd);
-       }
-}
-
-unique_ptr<VADisplayWithCleanup> va_open_display(const string &va_display)
-{
-       if (va_display.empty() || va_display[0] != '/') {  // An X display.
-               Display *x11_display = XOpenDisplay(va_display.empty() ? nullptr : va_display.c_str());
-               if (x11_display == nullptr) {
-                       fprintf(stderr, "error: can't connect to X server!\n");
-                       return nullptr;
-               }
-
-               unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
-               ret->x11_display = x11_display;
-               ret->va_dpy = vaGetDisplay(x11_display);
-               if (ret->va_dpy == nullptr) {
-                       return nullptr;
-               }
-               return ret;
-       } else {  // A DRM node on the filesystem (e.g. /dev/dri/renderD128).
-               int drm_fd = open(va_display.c_str(), O_RDWR);
-               if (drm_fd == -1) {
-                       perror(va_display.c_str());
-                       return nullptr;
-               }
-               unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
-               ret->drm_fd = drm_fd;
-               ret->va_dpy = vaGetDisplayDRM(drm_fd);
-               if (ret->va_dpy == nullptr) {
-                       return nullptr;
-               }
-               return ret;
-       }
-}
-
-unique_ptr<VADisplayWithCleanup> try_open_va(const string &va_display, string *error)
-{
-       unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
+       VAConfigID config_id_422, config_id_420;
+       VAImageFormat uyvy_format, nv12_format;
+
+       // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
+       unique_ptr<VADisplayWithCleanup> va_dpy =
+               try_open_va(va_display, { VAProfileJPEGBaseline }, VAEntrypointVLD,
+                       { { "4:2:2", VA_RT_FORMAT_YUV422, VA_FOURCC_UYVY, &config_id_422, &uyvy_format },
+                         { "4:2:0", VA_RT_FORMAT_YUV420, VA_FOURCC_NV12, &config_id_420, &nv12_format } },
+                       /*chosen_profile=*/nullptr, /*error=*/nullptr);
        if (va_dpy == nullptr) {
-               if (error)
-                       *error = "Opening VA display failed";
-               return nullptr;
-       }
-       int major_ver, minor_ver;
-       VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
-       if (va_status != VA_STATUS_SUCCESS) {
-               char buf[256];
-               snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
-               if (error != nullptr)
-                       *error = buf;
-               return nullptr;
-       }
-
-       int num_entrypoints = vaMaxNumEntrypoints(va_dpy->va_dpy);
-       unique_ptr<VAEntrypoint[]> entrypoints(new VAEntrypoint[num_entrypoints]);
-       if (entrypoints == nullptr) {
-               if (error != nullptr)
-                       *error = "Failed to allocate memory for VA entry points";
-               return nullptr;
-       }
-
-       vaQueryConfigEntrypoints(va_dpy->va_dpy, VAProfileJPEGBaseline, entrypoints.get(), &num_entrypoints);
-       for (int slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
-               if (entrypoints[slice_entrypoint] != VAEntrypointVLD) {
-                       continue;
-               }
-
-               // We found a usable decode, so return it.
                return va_dpy;
        }
 
-       if (error != nullptr)
-               *error = "Can't find VAEntrypointVLD for the JPEG profile";
-       return nullptr;
+       va_pool.reset(new VAResourcePool(va_dpy->va_dpy, uyvy_format, nv12_format, config_id_422, config_id_420, /*with_data_buffer=*/false));
+
+       return va_dpy;
 }
 
 string get_usable_va_display()
@@ -235,7 +91,7 @@ string get_usable_va_display()
        }
 
        // First try the default (ie., whatever $DISPLAY is set to).
-       unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va("", nullptr);
+       unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va_mjpeg("");
        if (va_dpy != nullptr) {
                if (need_env_reset) {
                        unsetenv("LIBVA_MESSAGING_LEVEL");
@@ -253,7 +109,7 @@ string get_usable_va_display()
        } else {
                for (size_t i = 0; i < g.gl_pathc; ++i) {
                        string path = g.gl_pathv[i];
-                       va_dpy = try_open_va(path, nullptr);
+                       va_dpy = try_open_va_mjpeg(path);
                        if (va_dpy != nullptr) {
                                fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
                                        path.c_str());
@@ -280,37 +136,11 @@ void init_jpeg_vaapi()
                return;
        }
 
-       va_dpy = try_open_va(dpy, nullptr);
+       va_dpy = try_open_va_mjpeg(dpy);
        if (va_dpy == nullptr) {
                return;
        }
 
-       VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
-
-       VAStatus va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointVLD,
-                                           &attr, 1, &config_id);
-       CHECK_VASTATUS(va_status, "vaCreateConfig");
-
-       int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
-       assert(num_formats > 0);
-
-       unique_ptr<VAImageFormat[]> formats(new VAImageFormat[num_formats]);
-       va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats);
-       CHECK_VASTATUS(va_status, "vaQueryImageFormats");
-
-       bool found = false;
-       for (int i = 0; i < num_formats; ++i) {
-               // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
-               if (formats[i].fourcc == VA_FOURCC_UYVY) {
-                       memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat));
-                       found = true;
-                       break;
-               }
-       }
-       if (!found) {
-               return;
-       }
-
        fprintf(stderr, "VA-API JPEG decoding initialized.\n");
        vaapi_jpeg_decoding_usable = true;
 }
@@ -355,12 +185,22 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
                        dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
                return nullptr;
        }
-       if (dinfo.comp_info[0].h_samp_factor != 2 ||
-           dinfo.comp_info[1].h_samp_factor != 1 ||
-           dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor ||
-           dinfo.comp_info[2].h_samp_factor != 1 ||
-           dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) {
-               fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+
+       const bool is_422 =
+               dinfo.comp_info[0].h_samp_factor == 2 &&
+               dinfo.comp_info[1].h_samp_factor == 1 &&
+               dinfo.comp_info[1].v_samp_factor == dinfo.comp_info[0].v_samp_factor &&
+               dinfo.comp_info[2].h_samp_factor == 1 &&
+               dinfo.comp_info[2].v_samp_factor == dinfo.comp_info[0].v_samp_factor;
+       const bool is_420 =
+               dinfo.comp_info[0].h_samp_factor == 2 &&
+               dinfo.comp_info[0].v_samp_factor == 2 &&
+               dinfo.comp_info[1].h_samp_factor == 1 &&
+               dinfo.comp_info[1].v_samp_factor == 1 &&
+               dinfo.comp_info[2].h_samp_factor == 1 &&
+               dinfo.comp_info[2].v_samp_factor == 1;
+       if (!is_422 && !is_420) {
+               fprintf(stderr, "Not 4:2:2 or 4:2:0. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
                        dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
                        dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
                        dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
@@ -383,8 +223,8 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
        pic_param.color_space = 0;  // YUV.
        pic_param.rotation = VA_ROTATION_NONE;
 
-       VAResources resources = get_va_resources(dinfo.image_width, dinfo.image_height);
-       ReleaseVAResources release(resources);
+       VAResourcePool::VAResources resources = va_pool->get_va_resources(dinfo.image_width, dinfo.image_height, is_422 ? VA_FOURCC_UYVY : VA_FOURCC_NV12);
+       ReleaseVAResources release(va_pool.get(), resources);
 
        VABufferID pic_param_buffer;
        VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, resources.context, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer);
@@ -558,22 +398,34 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
        uint8_t *y_pix = pbo.ptr;
        uint8_t *cbcr_pix = pbo.ptr + cbcr_offset;
 
-       const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
-       if (resources.image.pitches[0] == dinfo.image_width * 2) {
-               memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2);
-       } else {
-               for (unsigned y = 0; y < dinfo.image_height; ++y) {
-                       memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width,
-                                          src + y * resources.image.pitches[0], dinfo.image_width * 2);
+       unsigned cbcr_width = dinfo.image_width / 2;
+       unsigned cbcr_height;
+       if (is_422) {
+               const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
+               if (resources.image.pitches[0] == dinfo.image_width * 2) {
+                       memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2);
+               } else {
+                       for (unsigned y = 0; y < dinfo.image_height; ++y) {
+                               memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width,
+                                                  src + y * resources.image.pitches[0], dinfo.image_width * 2);
+                       }
                }
+               cbcr_height = dinfo.image_height;
+       } else {
+               assert(is_420);
+               const uint8_t *src_y = (const uint8_t *)mapped + resources.image.offsets[0];
+               const uint8_t *src_cbcr = (const uint8_t *)mapped + resources.image.offsets[1];
+               memcpy_with_pitch(y_pix, src_y, dinfo.image_width, resources.image.pitches[0], dinfo.image_height);
+               memcpy_with_pitch(cbcr_pix, src_cbcr, dinfo.image_width, resources.image.pitches[1], dinfo.image_height / 2);
+               cbcr_height = dinfo.image_height / 2;
        }
 
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
        frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
-       frame->cbcr = create_texture_2d(dinfo.image_width / 2, dinfo.image_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset));
+       frame->cbcr = create_texture_2d(cbcr_width, cbcr_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset));
+       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + cbcr_width * cbcr_height * 2);
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 
-       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2);
        glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
        pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
        frame->uploaded_ui_thread = pbo.upload_done;
@@ -583,7 +435,7 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
        frame->width = dinfo.image_width;
        frame->height = dinfo.image_height;
        frame->chroma_subsampling_x = 2;
-       frame->chroma_subsampling_y = 1;
+       frame->chroma_subsampling_y = is_420 ? 2 : 1;
 
        if (dinfo.marker_list != nullptr &&
            dinfo.marker_list->marker == JPEG_APP0 + 1 &&