]> git.sesse.net Git - nageru/blobdiff - futatabi/vaapi_jpeg_decoder.cpp
Change Futatabi frames to be cached as textures instead of in system memory.
[nageru] / futatabi / vaapi_jpeg_decoder.cpp
index 12db78b194a52075ca8fc578d5af6589698e2df7..758d974bfeb1fc4e0e6e9b595bef779f5f12bfa3 100644 (file)
@@ -2,7 +2,9 @@
 
 #include "jpeg_destroyer.h"
 #include "jpeg_frame.h"
-#include "memcpy_interleaved.h"
+#include "jpeglib_error_wrapper.h"
+#include "pbo_pool.h"
+#include "shared/memcpy_interleaved.h"
 
 #include <X11/Xlib.h>
 #include <assert.h>
@@ -21,6 +23,8 @@
 #include <va/va_drm.h>
 #include <va/va_x11.h>
 
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
 using namespace std;
 
 static unique_ptr<VADisplayWithCleanup> va_dpy;
@@ -37,17 +41,17 @@ struct VAResources {
 static list<VAResources> va_resources_freelist;
 static mutex va_resources_mutex;
 
-#define CHECK_VASTATUS(va_status, func)                                 \
-    if (va_status != VA_STATUS_SUCCESS) {                               \
-        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
-        exit(1);                                                        \
-    }
+#define CHECK_VASTATUS(va_status, func) \
+       if (va_status != VA_STATUS_SUCCESS) { \
+               fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+               abort(); \
+       }
 
-#define CHECK_VASTATUS_RET(va_status, func)                             \
-    if (va_status != VA_STATUS_SUCCESS) {                               \
-        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
-        return nullptr;                                                 \
-    }
+#define CHECK_VASTATUS_RET(va_status, func) \
+       if (va_status != VA_STATUS_SUCCESS) { \
+               fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+               return nullptr; \
+       }
 
 // From libjpeg (although it's of course identical between implementations).
 static const int jpeg_natural_order[DCTSIZE2] = {
@@ -80,8 +84,8 @@ VAResources get_va_resources(unsigned width, unsigned height)
        ret.height = height;
 
        VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
-               width, height,
-               &ret.surface, 1, nullptr, 0);
+                                             width, height,
+                                             &ret.surface, 1, nullptr, 0);
        CHECK_VASTATUS(va_status, "vaCreateSurfaces");
 
        va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
@@ -245,14 +249,14 @@ string get_usable_va_display()
        glob_t g;
        int err = glob("/dev/dri/renderD*", 0, nullptr, &g);
        if (err != 0) {
-               fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
+               fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
        } else {
                for (size_t i = 0; i < g.gl_pathc; ++i) {
                        string path = g.gl_pathv[i];
                        va_dpy = try_open_va(path, nullptr);
                        if (va_dpy != nullptr) {
                                fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
-                                       path.c_str());
+                                       path.c_str());
                                globfree(&g);
                                if (need_env_reset) {
                                        unsetenv("LIBVA_MESSAGING_LEVEL");
@@ -284,7 +288,7 @@ void init_jpeg_vaapi()
        VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
 
        VAStatus va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointVLD,
-               &attr, 1, &config_id);
+                                           &attr, 1, &config_id);
        CHECK_VASTATUS(va_status, "vaCreateConfig");
 
        int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
@@ -316,7 +320,8 @@ public:
        VABufferDestroyer(VADisplay dpy, VABufferID buf)
                : dpy(dpy), buf(buf) {}
 
-       ~VABufferDestroyer() {
+       ~VABufferDestroyer()
+       {
                VAStatus va_status = vaDestroyBuffer(dpy, buf);
                CHECK_VASTATUS(va_status, "vaDestroyBuffer");
        }
@@ -329,20 +334,25 @@ private:
 shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
 {
        jpeg_decompress_struct dinfo;
-       jpeg_error_mgr jerr;
-       dinfo.err = jpeg_std_error(&jerr);
-       jpeg_create_decompress(&dinfo);
+       JPEGWrapErrorManager error_mgr(&dinfo);
+       if (!error_mgr.run([&dinfo] { jpeg_create_decompress(&dinfo); })) {
+               return nullptr;
+       }
        JPEGDestroyer destroy_dinfo(&dinfo);
 
+       jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF);
+
        jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
-       jpeg_read_header(&dinfo, true);
+       if (!error_mgr.run([&dinfo] { jpeg_read_header(&dinfo, true); })) {
+               return nullptr;
+       }
 
        if (dinfo.num_components != 3) {
                fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
-                       dinfo.num_components,
-                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
-                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
-                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+                       dinfo.num_components,
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
                return nullptr;
        }
        if (dinfo.comp_info[0].h_samp_factor != 2 ||
@@ -351,9 +361,9 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
            dinfo.comp_info[2].h_samp_factor != 1 ||
            dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) {
                fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
-                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
-                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
-                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
                return nullptr;
        }
 
@@ -542,24 +552,46 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
 #else
        // Convert Y'CbCr to separate Y' and CbCr.
        frame->is_semiplanar = true;
-       frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
-       frame->cbcr.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+
+       PBO pbo = global_pbo_pool->alloc_pbo();
+       size_t cbcr_offset = dinfo.image_width * dinfo.image_height;
+       uint8_t *y_pix = pbo.ptr;
+       uint8_t *cbcr_pix = pbo.ptr + cbcr_offset;
+
        const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
        if (resources.image.pitches[0] == dinfo.image_width * 2) {
-               memcpy_interleaved(frame->cbcr.get(), frame->y.get(), src, dinfo.image_width * dinfo.image_height * 2);
+               memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2);
        } else {
                for (unsigned y = 0; y < dinfo.image_height; ++y) {
-                       memcpy_interleaved(frame->cbcr.get() + y * dinfo.image_width, frame->y.get() + y * dinfo.image_width,
+                       memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width,
                                           src + y * resources.image.pitches[0], dinfo.image_width * 2);
                }
        }
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
+       frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       frame->cbcr = create_texture_2d(dinfo.image_width / 2, dinfo.image_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset));
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2);
+       glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
+       pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       frame->uploaded_ui_thread = pbo.upload_done;
+       frame->uploaded_interpolation = pbo.upload_done;
+       global_pbo_pool->release_pbo(move(pbo));
 #endif
        frame->width = dinfo.image_width;
        frame->height = dinfo.image_height;
        frame->chroma_subsampling_x = 2;
        frame->chroma_subsampling_y = 1;
-       frame->pitch_y = dinfo.image_width;
-       frame->pitch_chroma = dinfo.image_width / 2;
+
+       if (dinfo.marker_list != nullptr &&
+           dinfo.marker_list->marker == JPEG_APP0 + 1 &&
+           dinfo.marker_list->data_length >= 4 &&
+           memcmp(dinfo.marker_list->data, "Exif", 4) == 0) {
+               frame->exif_data.assign(reinterpret_cast<char *>(dinfo.marker_list->data),
+                       dinfo.marker_list->data_length);
+       }
 
        va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
        CHECK_VASTATUS_RET(va_status, "vaUnmapBuffer");