]> git.sesse.net Git - nageru/commitdiff
Change Futatabi frames to be cached as textures instead of in system memory.
authorSteinar H. Gunderson <steinar+nageru@gunderson.no>
Sun, 8 Mar 2020 14:57:51 +0000 (15:57 +0100)
committerSteinar H. Gunderson <steinar+nageru@gunderson.no>
Sun, 8 Mar 2020 14:57:51 +0000 (15:57 +0100)
The JPEGs are now decoded into PBO bounce buffers, which saves a lot of CPU
time (copying is asynchronous, and done by the GPU -- plus we save a copy
into a staging buffer).

Similarly, keeping the cache in textures allows the driver (if it wants!)
to keep it in VRAM, saving repeated uploading if the same frame is used
multiple times.

CPU usage is down from 1.05 to 0.60 cores on my machine, when not playing.
More importantly, the 99-percentile player queue status is extremely much
better.

15 files changed:
futatabi/jpeg_frame.h
futatabi/jpeg_frame_view.cpp
futatabi/jpeg_frame_view.h
futatabi/mainwindow.cpp
futatabi/pbo_pool.cpp [new file with mode: 0644]
futatabi/pbo_pool.h [new file with mode: 0644]
futatabi/vaapi_jpeg_decoder.cpp
futatabi/video_stream.cpp
futatabi/ycbcr_converter.cpp
meson.build
nageru/image_input.cpp
nageru/image_input.h
shared/meson.build
shared/ref_counted_texture.cpp [new file with mode: 0644]
shared/ref_counted_texture.h

index 6fd0d4b9d05cfb685495ae8e86287b61efb09f4f..5e94cbbdc58acf70612d3b4a302cc12b9df12fc5 100644 (file)
@@ -4,15 +4,19 @@
 #include <memory>
 #include <string>
 
+#include "shared/ref_counted_gl_sync.h"
+#include "shared/ref_counted_texture.h"
+
 struct Frame {
        bool is_semiplanar = false;
-       std::unique_ptr<uint8_t[]> y;
-       std::unique_ptr<uint8_t[]> cb, cr;  // For planar.
-       std::unique_ptr<uint8_t[]> cbcr;  // For semiplanar.
+       RefCountedTexture y;
+       RefCountedTexture cb, cr;  // For planar.
+       RefCountedTexture cbcr;  // For semiplanar.
        unsigned width, height;
        unsigned chroma_subsampling_x, chroma_subsampling_y;
-       unsigned pitch_y, pitch_chroma;
        std::string exif_data;
+       RefCountedGLsync uploaded_ui_thread;
+       RefCountedGLsync uploaded_interpolation;
 };
 
 #endif  // !defined(_JPEG_FRAME_H)
index ebcf509dbc91b470d18377e973d97874617093a0..85c708d4d20f9aff8f137d0d3573b489d9a368f5 100644 (file)
@@ -4,6 +4,8 @@
 #include "flags.h"
 #include "jpeg_destroyer.h"
 #include "jpeglib_error_wrapper.h"
+#include "pbo_pool.h"
+#include "shared/context.h"
 #include "shared/metrics.h"
 #include "shared/post_to_main_thread.h"
 #include "video_stream.h"
@@ -159,15 +161,15 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
 
        unsigned luma_width_blocks = mcu_width_blocks * dinfo.comp_info[0].h_samp_factor;
        unsigned chroma_width_blocks = mcu_width_blocks * dinfo.comp_info[1].h_samp_factor;
-       unsigned luma_height_blocks = mcu_height_blocks * dinfo.comp_info[0].v_samp_factor;
-       unsigned chroma_height_blocks = mcu_height_blocks * dinfo.comp_info[1].v_samp_factor;
 
-       // TODO: Decode into a PBO.
-       frame->y.reset(new uint8_t[luma_width_blocks * luma_height_blocks * DCTSIZE2]);
-       frame->cb.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
-       frame->cr.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
-       frame->pitch_y = luma_width_blocks * DCTSIZE;
-       frame->pitch_chroma = chroma_width_blocks * DCTSIZE;
+       PBO pbo = global_pbo_pool->alloc_pbo();
+       size_t cb_offset = dinfo.image_width * dinfo.image_height;
+       size_t cr_offset = cb_offset + (dinfo.image_width / 2) * dinfo.image_height;
+       uint8_t *y_pix = pbo.ptr;
+       uint8_t *cb_pix = pbo.ptr + cb_offset;
+       uint8_t *cr_pix = pbo.ptr + cr_offset;
+       unsigned pitch_y = luma_width_blocks * DCTSIZE;
+       unsigned pitch_chroma = chroma_width_blocks * DCTSIZE * 2;
 
        if (dinfo.marker_list != nullptr &&
            dinfo.marker_list->marker == JPEG_APP0 + 1 &&
@@ -177,15 +179,15 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
                        dinfo.marker_list->data_length);
        }
 
-       if (!error_mgr.run([&dinfo, &frame, v_mcu_size, mcu_height_blocks] {
+       if (!error_mgr.run([&dinfo, &y_pix, &cb_pix, &cr_pix, pitch_y, pitch_chroma, v_mcu_size, mcu_height_blocks] {
                    JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size];
                    JSAMPARRAY data[3] = { yptr, cbptr, crptr };
                    for (unsigned y = 0; y < mcu_height_blocks; ++y) {
                            // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma.
                            for (unsigned yy = 0; yy < v_mcu_size; ++yy) {
-                                   yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y;
-                                   cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
-                                   crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
+                                   yptr[yy] = y_pix + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * pitch_y;
+                                   cbptr[yy] = cb_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma;
+                                   crptr[yy] = cr_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma;
                            }
 
                            jpeg_read_raw_data(&dinfo, data, v_mcu_size);
@@ -196,6 +198,20 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
                return get_black_frame();
        }
 
+       // FIXME: what about resolutions that are not divisible by the block factor?
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
+       frame->y = create_texture_2d(frame->width, frame->height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       frame->cb = create_texture_2d(frame->width / 2, frame->height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cb_offset));
+       frame->cr = create_texture_2d(frame->width / 2, frame->height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cr_offset));
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2);
+       glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
+       pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       frame->uploaded_ui_thread = pbo.upload_done;
+       frame->uploaded_interpolation = pbo.upload_done;
+       global_pbo_pool->release_pbo(move(pbo));
+
        ++metric_jpeg_software_decode_frames;
        steady_clock::time_point stop = steady_clock::now();
        metric_jpeg_decode_time_seconds.count_event(duration<double>(stop - start).count());
@@ -276,6 +292,13 @@ void JPEGFrameView::jpeg_decoder_thread_func()
        size_t num_decoded = 0, num_dropped = 0;
 
        pthread_setname_np(pthread_self(), "JPEGDecoder");
+       QSurface *surface = create_surface();
+       QOpenGLContext *context = create_context(surface);
+       bool ok = make_current(context, surface);
+       if (!ok) {
+               fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
+               abort();
+       }
        while (!should_quit.load()) {
                PendingDecode decode;
                CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE;
@@ -392,6 +415,8 @@ void JPEGFrameView::setFrame(shared_ptr<Frame> frame)
 
 void JPEGFrameView::initializeGL()
 {
+       init_pbo_pool();
+
        glDisable(GL_BLEND);
        glDisable(GL_DEPTH_TEST);
        check_error();
@@ -436,6 +461,14 @@ void JPEGFrameView::paintGL()
                ++metric_jpeg_displayed_frames;
                displayed_this_frame = true;
        }
+       if (current_frame->uploaded_ui_thread != nullptr) {
+               glWaitSync(current_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               current_frame->uploaded_ui_thread.reset();
+       }
+       if (current_secondary_frame != nullptr && current_secondary_frame->uploaded_ui_thread != nullptr) {
+               glWaitSync(current_secondary_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               current_secondary_frame->uploaded_ui_thread.reset();
+       }
 
        check_error();
        current_chain->render_to_screen();
@@ -527,16 +560,17 @@ shared_ptr<Frame> get_black_frame()
        static shared_ptr<Frame> black_frame;
        static once_flag flag;
        call_once(flag, [] {
-               black_frame.reset(new Frame);
-               black_frame->y.reset(new uint8_t[global_flags.width * global_flags.height]);
-               black_frame->cb.reset(new uint8_t[(global_flags.width / 2) * (global_flags.height / 2)]);
-               black_frame->cr.reset(new uint8_t[(global_flags.width / 2) * (global_flags.height / 2)]);
-               black_frame->width = global_flags.width;
-               black_frame->height = global_flags.height;
-               black_frame->chroma_subsampling_x = 2;
-               black_frame->chroma_subsampling_y = 2;
-               black_frame->pitch_y = global_flags.width;
-               black_frame->pitch_chroma = global_flags.width / 2;
+               // Not really black, but whatever. :-)
+               uint8_t black[] = { 0, 0, 0, 255 };
+               RefCountedTexture black_tex = create_texture_2d(1, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, black);
+
+               black_frame->y = black_tex;
+               black_frame->cb = black_tex;
+               black_frame->cr = move(black_tex);
+               black_frame->width = 1;
+               black_frame->height = 1;
+               black_frame->chroma_subsampling_x = 1;
+               black_frame->chroma_subsampling_y = 1;
        });
        ++metric_jpeg_software_fail_frames;
        return black_frame;
index 3f92e4c708e602819696bd57fadafd54efb61b2b..693ea9b6d0a4c5f58fd3f4afafc7ca1ccf484847 100644 (file)
@@ -63,7 +63,7 @@ private:
        movit::EffectChain *current_chain = nullptr;  // Owned by ycbcr_converter.
 
        bool displayed_this_frame = false;  // Owned by the UI frame.
-       std::shared_ptr<Frame> current_frame;  // So that we hold on to the pixels.
+       std::shared_ptr<Frame> current_frame;  // So that we hold on to the textures.
        std::shared_ptr<Frame> current_secondary_frame;  // Same.
 
        int overlay_base_width = 16, overlay_base_height = 16;
index 49581e53caeb134d180b1bba5546a8083fec05fa..0bc11d810e0c8e411d65a655987f387874b37a07 100644 (file)
@@ -7,6 +7,7 @@
 #include "player.h"
 #include "futatabi_midi_mapping.pb.h"
 #include "midi_mapping_dialog.h"
+#include "pbo_pool.h"
 #include "shared/aboutdialog.h"
 #include "shared/disk_space_estimator.h"
 #include "shared/post_to_main_thread.h"
diff --git a/futatabi/pbo_pool.cpp b/futatabi/pbo_pool.cpp
new file mode 100644 (file)
index 0000000..1933b31
--- /dev/null
@@ -0,0 +1,79 @@
+#include "pbo_pool.h"
+
+#include <chrono>
+#include <mutex>
+
+#include <movit/util.h>
+
+using namespace std;
+using namespace std::chrono;
+
+once_flag global_pbo_pool_inited;
+PBOPool *global_pbo_pool = nullptr;
+
+void init_pbo_pool()
+{
+       call_once(global_pbo_pool_inited, []{
+               global_pbo_pool = new PBOPool;
+       });
+}
+
+PBOPool::PBOPool(size_t pbo_size, size_t num_pbos, GLenum buffer, GLenum permissions, GLenum map_bits)
+       : pbo_size(pbo_size), buffer(buffer), permissions(permissions), map_bits(map_bits)
+{
+       for (size_t i = 0; i < num_pbos; ++i) {
+               freelist.push(create_pbo());
+       }
+}
+
+PBO PBOPool::alloc_pbo()
+{
+       PBO pbo;
+       bool found_pbo = false;
+       {
+               lock_guard<mutex> lock(freelist_mutex);
+               if (!freelist.empty()) {
+                       pbo = move(freelist.front());
+                       freelist.pop();
+                       found_pbo = true;
+               }
+       }
+
+       if (!found_pbo) {
+               fprintf(stderr, "WARNING: Out of PBOs for texture upload, creating a new one\n");
+               pbo = create_pbo();
+       }
+       if (pbo.upload_done != nullptr) {
+               if (glClientWaitSync(pbo.upload_done.get(), 0, 0) == GL_TIMEOUT_EXPIRED) {
+                       steady_clock::time_point start = steady_clock::now();
+                       glClientWaitSync(pbo.upload_done.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+                       steady_clock::time_point stop = steady_clock::now();
+
+                       fprintf(stderr, "WARNING: PBO was not ready after previous upload, had to wait %.1f ms before reusing\n",
+                               1e3 * duration<double>(stop - start).count());
+               }
+               pbo.upload_done.reset();
+       }
+
+       return pbo;
+}
+
+void PBOPool::release_pbo(PBO pbo)
+{
+       lock_guard<mutex> lock(freelist_mutex);
+       freelist.push(move(pbo));
+}
+
+PBO PBOPool::create_pbo()
+{
+       PBO pbo;
+       
+       glCreateBuffers(1, &pbo.pbo);
+       check_error();
+       glNamedBufferStorage(pbo.pbo, pbo_size, nullptr, permissions | GL_MAP_PERSISTENT_BIT);
+       check_error();
+        pbo.ptr = (uint8_t *)glMapNamedBufferRange(pbo.pbo, 0, pbo_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT);
+       check_error();
+
+       return pbo;
+}
diff --git a/futatabi/pbo_pool.h b/futatabi/pbo_pool.h
new file mode 100644 (file)
index 0000000..20c61e4
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef _PBO_POOL_H
+#define _PBO_POOL_H 1
+
+// Keeps a pool of persistently mapped PBOs around that can be used as staging
+// buffers for texture uploads. (Uploading from a PBO is asynchronous and done
+// by the GPU, so assuming we don't need an extra copy into the PBO, this is a
+// significant win over uploading from regular malloc-ed RAM.)
+//
+// Unlike Nageru's PBOFrameAllocator, these are not connected to
+// a given frame, since we can have thousands of frames in the cache
+// at any given time. Thus, we need to have separate fences for each PBO
+// to know that the upload is done.
+
+#include <mutex>
+#include <queue>
+
+#include <epoxy/gl.h>
+
+#include "shared/ref_counted_gl_sync.h"
+
+struct PBO {
+       GLuint pbo;
+       uint8_t *ptr;  // Mapped memory.
+       RefCountedGLsync upload_done;
+};
+
+class PBOPool {
+public:
+       PBOPool(size_t pbo_size = 8 << 20,  // 8 MB, large enough for 1080p 4:2:2.
+                size_t num_pbos = 8,
+                GLenum buffer = GL_PIXEL_UNPACK_BUFFER_ARB,
+                GLenum permissions = GL_MAP_WRITE_BIT,
+                GLenum map_bits = GL_MAP_FLUSH_EXPLICIT_BIT);
+
+       PBO alloc_pbo();
+       void release_pbo(PBO pbo);  // Set a fence on upload_done if the PBO may still be in use.
+
+private:
+       PBO create_pbo();
+
+       std::mutex freelist_mutex;
+       std::queue<PBO> freelist;
+
+       size_t pbo_size;
+       GLenum buffer, permissions, map_bits;
+};
+
+extern PBOPool *global_pbo_pool;
+void init_pbo_pool();  // Idempotent.
+
+#endif  // !defined(_PBO_POOL_H)
index f34654d508504434e81fbcc816e12b3a8aba95a5..758d974bfeb1fc4e0e6e9b595bef779f5f12bfa3 100644 (file)
@@ -3,6 +3,7 @@
 #include "jpeg_destroyer.h"
 #include "jpeg_frame.h"
 #include "jpeglib_error_wrapper.h"
+#include "pbo_pool.h"
 #include "shared/memcpy_interleaved.h"
 
 #include <X11/Xlib.h>
@@ -22,6 +23,8 @@
 #include <va/va_drm.h>
 #include <va/va_x11.h>
 
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
 using namespace std;
 
 static unique_ptr<VADisplayWithCleanup> va_dpy;
@@ -549,24 +552,38 @@ shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
 #else
        // Convert Y'CbCr to separate Y' and CbCr.
        frame->is_semiplanar = true;
-       frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
-       frame->cbcr.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+
+       PBO pbo = global_pbo_pool->alloc_pbo();
+       size_t cbcr_offset = dinfo.image_width * dinfo.image_height;
+       uint8_t *y_pix = pbo.ptr;
+       uint8_t *cbcr_pix = pbo.ptr + cbcr_offset;
+
        const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
        if (resources.image.pitches[0] == dinfo.image_width * 2) {
-               memcpy_interleaved(frame->cbcr.get(), frame->y.get(), src, dinfo.image_width * dinfo.image_height * 2);
+               memcpy_interleaved(cbcr_pix, y_pix, src, dinfo.image_width * dinfo.image_height * 2);
        } else {
                for (unsigned y = 0; y < dinfo.image_height; ++y) {
-                       memcpy_interleaved(frame->cbcr.get() + y * dinfo.image_width, frame->y.get() + y * dinfo.image_width,
+                       memcpy_interleaved(cbcr_pix + y * dinfo.image_width, y_pix + y * dinfo.image_width,
                                           src + y * resources.image.pitches[0], dinfo.image_width * 2);
                }
        }
+
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
+       frame->y = create_texture_2d(dinfo.image_width, dinfo.image_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       frame->cbcr = create_texture_2d(dinfo.image_width / 2, dinfo.image_height, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cbcr_offset));
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height * 2);
+       glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
+       pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       frame->uploaded_ui_thread = pbo.upload_done;
+       frame->uploaded_interpolation = pbo.upload_done;
+       global_pbo_pool->release_pbo(move(pbo));
 #endif
        frame->width = dinfo.image_width;
        frame->height = dinfo.image_height;
        frame->chroma_subsampling_x = 2;
        frame->chroma_subsampling_y = 1;
-       frame->pitch_y = dinfo.image_width;
-       frame->pitch_chroma = dinfo.image_width / 2;
 
        if (dinfo.marker_list != nullptr &&
            dinfo.marker_list->marker == JPEG_APP0 + 1 &&
index 5a36801b5e6ebf787cc5f84874650d63db2862fb..591ee7e219883e5f4eb9209cbb17e5100108ea83 100644 (file)
@@ -11,6 +11,7 @@ extern "C" {
 #include "flow.h"
 #include "jpeg_frame_view.h"
 #include "movit/util.h"
+#include "pbo_pool.h"
 #include "player.h"
 #include "shared/context.h"
 #include "shared/httpd.h"
@@ -37,6 +38,14 @@ Summary metric_interpolation_latency_seconds;
 Summary metric_fade_fence_wait_time_seconds;
 Summary metric_interpolation_fence_wait_time_seconds;
 
+void wait_for_upload(shared_ptr<Frame> &frame)
+{
+       if (frame->uploaded_interpolation != nullptr) {
+               glWaitSync(frame->uploaded_interpolation.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               frame->uploaded_interpolation.reset();
+       }
+}
+
 }  // namespace
 
 extern HTTPD *global_httpd;
@@ -152,6 +161,16 @@ string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t
        return move(dest.dest);
 }
 
+string encode_jpeg_from_pbo(void *contents, unsigned width, unsigned height, const string exif_data)
+{
+       unsigned chroma_width = width / 2;
+
+       const uint8_t *y = (const uint8_t *)contents;
+       const uint8_t *cb = (const uint8_t *)contents + width * height;
+       const uint8_t *cr = (const uint8_t *)contents + width * height + chroma_width * height;
+       return encode_jpeg(y, cb, cr, width, height, move(exif_data));
+}
+
 VideoStream::VideoStream(AVFormatContext *file_avctx)
        : avctx(file_avctx), output_fast_forward(file_avctx != nullptr)
 {
@@ -430,6 +449,8 @@ void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64
 
        shared_ptr<Frame> frame1 = decode_jpeg_with_cache(frame1_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
        shared_ptr<Frame> frame2 = decode_jpeg_with_cache(frame2_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+       wait_for_upload(frame1);
+       wait_for_upload(frame2);
 
        ycbcr_semiplanar_converter->prepare_chain_for_fade(frame1, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
 
@@ -517,6 +538,7 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
                FrameOnDisk frame_spec = frame_no == 1 ? frame2 : frame1;
                bool did_decode;
                shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+               wait_for_upload(frame);
                ycbcr_converter->prepare_chain_for_conversion(frame)->render_to_fbo(resources->input_fbos[frame_no], global_flags.width, global_flags.height);
                if (frame_no == 1) {
                        qf.exif_data = frame->exif_data;  // Use the white point from the last frame.
@@ -557,6 +579,7 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
                // Now decode the image we are fading against.
                bool did_decode;
                shared_ptr<Frame> frame2 = decode_jpeg_with_cache(secondary_frame, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+               wait_for_upload(frame2);
 
                // Then fade against it, putting it into the fade Y' and CbCr textures.
                RGBTriplet neutral_color = get_neutral_color(qf.exif_data);
@@ -645,31 +668,27 @@ void VideoStream::schedule_silence(steady_clock::time_point local_pts, int64_t o
 
 namespace {
 
-shared_ptr<Frame> frame_from_pbo(void *contents, size_t width, size_t height)
+RefCountedTexture clone_r8_texture(GLuint src_tex, unsigned width, unsigned height)
 {
-       size_t chroma_width = width / 2;
-
-       const uint8_t *y = (const uint8_t *)contents;
-       const uint8_t *cb = (const uint8_t *)contents + width * height;
-       const uint8_t *cr = (const uint8_t *)contents + width * height + chroma_width * height;
+       GLuint tex;
+       glCreateTextures(GL_TEXTURE_2D, 1, &tex);
+       check_error();
+       glTextureStorage2D(tex, 1, GL_R8, width, height);
+       check_error();
+       glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, 0, 0, 0,
+                          tex, GL_TEXTURE_2D, 0, 0, 0, 0,
+                          width, height, 1);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+       check_error();
 
-       shared_ptr<Frame> frame(new Frame);
-       frame->y.reset(new uint8_t[width * height]);
-       frame->cb.reset(new uint8_t[chroma_width * height]);
-       frame->cr.reset(new uint8_t[chroma_width * height]);
-       for (unsigned yy = 0; yy < height; ++yy) {
-               memcpy(frame->y.get() + width * yy, y + width * yy, width);
-               memcpy(frame->cb.get() + chroma_width * yy, cb + chroma_width * yy, chroma_width);
-               memcpy(frame->cr.get() + chroma_width * yy, cr + chroma_width * yy, chroma_width);
-       }
-       frame->is_semiplanar = false;
-       frame->width = width;
-       frame->height = height;
-       frame->chroma_subsampling_x = 2;
-       frame->chroma_subsampling_y = 1;
-       frame->pitch_y = width;
-       frame->pitch_chroma = chroma_width;
-       return frame;
+       return RefCountedTexture(new GLuint(tex), TextureDeleter());
 }
 
 }  // namespace
@@ -685,6 +704,8 @@ void VideoStream::encode_thread_func()
                abort();
        }
 
+       init_pbo_pool();
+
        while (!should_quit) {
                QueuedFrame qf;
                {
@@ -751,11 +772,8 @@ void VideoStream::encode_thread_func()
                        metric_fade_fence_wait_time_seconds.count_event(duration<double>(stop - start).count());
                        metric_fade_latency_seconds.count_event(duration<double>(stop - qf.fence_created).count());
 
-                       shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
-                       assert(frame->exif_data.empty());
-
                        // Now JPEG encode it, and send it on to the stream.
-                       string jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height, /*exif_data=*/"");
+                       string jpeg = encode_jpeg_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height, /*exif_data=*/"");
 
                        AVPacket pkt;
                        av_init_packet(&pkt);
@@ -775,13 +793,25 @@ void VideoStream::encode_thread_func()
                        metric_interpolation_latency_seconds.count_event(duration<double>(stop - qf.fence_created).count());
 
                        // Send it on to display.
-                       shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
                        if (qf.display_decoded_func != nullptr) {
-                               qf.display_decoded_func(frame);
+                               shared_ptr<Frame> frame(new Frame);
+                               if (qf.type == QueuedFrame::FADED_INTERPOLATED) {
+                                       frame->y = clone_r8_texture(qf.resources->fade_y_output_tex, global_flags.width, global_flags.height);
+                               } else {
+                                       frame->y = clone_r8_texture(qf.output_tex, global_flags.width, global_flags.height);
+                               }
+                               frame->cb = clone_r8_texture(qf.resources->cb_tex, global_flags.width / 2, global_flags.height);
+                               frame->cr = clone_r8_texture(qf.resources->cr_tex, global_flags.width / 2, global_flags.height);
+                               frame->width = global_flags.width;
+                               frame->height = global_flags.height;
+                               frame->chroma_subsampling_x = 2;
+                               frame->chroma_subsampling_y = 1;
+                               frame->uploaded_ui_thread = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+                               qf.display_decoded_func(move(frame));
                        }
 
                        // Now JPEG encode it, and send it on to the stream.
-                       string jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height, move(qf.exif_data));
+                       string jpeg = encode_jpeg_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height, move(qf.exif_data));
                        if (qf.flow_tex != 0) {
                                compute_flow->release_texture(qf.flow_tex);
                        }
index 2d2f32f6077e72ae4cd5894634227b853161bfd6..0edd7e66151e3b7856d918725f87913e4f309df0 100644 (file)
@@ -194,15 +194,11 @@ void setup_input_for_frame(shared_ptr<Frame> frame, const YCbCrFormat &ycbcr_for
 
        input->set_width(frame->width);
        input->set_height(frame->height);
-       input->set_pixel_data(0, frame->y.get());
-       input->set_pitch(0, frame->pitch_y);
+       input->set_texture_num(0, *frame->y);
        if (frame->is_semiplanar) {
-               input->set_pixel_data(1, frame->cbcr.get());
-               input->set_pitch(1, frame->pitch_chroma);
+               input->set_texture_num(1, *frame->cbcr);
        } else {
-               input->set_pixel_data(1, frame->cb.get());
-               input->set_pixel_data(2, frame->cr.get());
-               input->set_pitch(1, frame->pitch_chroma);
-               input->set_pitch(2, frame->pitch_chroma);
+               input->set_texture_num(1, *frame->cb);
+               input->set_texture_num(2, *frame->cr);
        }
 }
index d705c7fcb61feb819a1494ba44ae0dd15721f0f5..76b4b59c27410fd3d5dfadff653bf39e1d9a246e 100644 (file)
@@ -290,7 +290,7 @@ futatabi_srcs += ['futatabi/main.cpp', 'futatabi/player.cpp', 'futatabi/video_st
 futatabi_srcs += ['futatabi/vaapi_jpeg_decoder.cpp', 'futatabi/db.cpp', 'futatabi/ycbcr_converter.cpp', 'futatabi/flags.cpp']
 futatabi_srcs += ['futatabi/mainwindow.cpp', 'futatabi/jpeg_frame_view.cpp', 'futatabi/clip_list.cpp', 'futatabi/frame_on_disk.cpp']
 futatabi_srcs += ['futatabi/export.cpp', 'futatabi/midi_mapper.cpp', 'futatabi/midi_mapping_dialog.cpp']
-futatabi_srcs += ['futatabi/exif_parser.cpp']
+futatabi_srcs += ['futatabi/exif_parser.cpp', 'futatabi/pbo_pool.cpp']
 futatabi_srcs += moc_files
 futatabi_srcs += proto_generated
 
index 6a2c5abc846b913123ef00647373bd6267ee1d4b..afb87c57a6aa32577ee9473f9c87be89c6b70058 100644 (file)
@@ -228,7 +228,7 @@ shared_ptr<const ImageInput::Image> ImageInput::load_image_raw(const string &pat
        glBindTexture(GL_TEXTURE_2D, 0);
        check_error();
 
-       shared_ptr<Image> image(new Image{unsigned(frame->width), unsigned(frame->height), RefCountedTexture(new GLuint(tex)), last_modified});
+       shared_ptr<Image> image(new Image{unsigned(frame->width), unsigned(frame->height), UniqueTexture(new GLuint(tex)), last_modified});
        return image;
 }
 
index babf5f529316df31c7b1d2bd2cd4e7f44534b3e8..7b712ab13ec9920ac53fcf5a956fb349022b3ec0 100644 (file)
@@ -27,7 +27,7 @@ public:
        // NOTE: You will need to call start_update_thread() yourself, once per program.
        struct Image {
                unsigned width, height;
-               RefCountedTexture tex;
+               UniqueTexture tex;
                timespec last_modified;
        };
        static std::shared_ptr<const Image> load_image(const std::string &filename, const std::string &pathname);
index 5653528521b290e12c944a0640c75beb54cdb469..c7ef06c410ea32464c91ecb21f45d24c268499b3 100644 (file)
@@ -3,6 +3,7 @@ shared_qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGL']
 libmicrohttpddep = dependency('libmicrohttpd')
 protobufdep = dependency('protobuf')
 alsadep = dependency('alsa')
+movitdep = dependency('movit')
 
 # Preprocess Qt as needed.
 qt_files = qt5.preprocess(
@@ -18,14 +19,14 @@ proto_generated = gen.process(['midi_mapping.proto'])
 protobuf_lib = static_library('protobufs', proto_generated, dependencies: [protobufdep])
 protobuf_hdrs = declare_dependency(sources: proto_generated)
 
-srcs = ['memcpy_interleaved.cpp', 'metacube2.cpp', 'ffmpeg_raii.cpp', 'mux.cpp', 'metrics.cpp', 'context.cpp', 'httpd.cpp', 'disk_space_estimator.cpp', 'read_file.cpp', 'text_proto.cpp', 'midi_device.cpp']
+srcs = ['memcpy_interleaved.cpp', 'metacube2.cpp', 'ffmpeg_raii.cpp', 'mux.cpp', 'metrics.cpp', 'context.cpp', 'httpd.cpp', 'disk_space_estimator.cpp', 'read_file.cpp', 'text_proto.cpp', 'midi_device.cpp', 'ref_counted_texture.cpp']
 srcs += proto_generated
 
 # Qt objects.
 srcs += qt_files
 srcs += ['aboutdialog.cpp']
 
-shared = static_library('shared', srcs, include_directories: top_include, dependencies: [shared_qt5deps, libmicrohttpddep, protobufdep, alsadep])
+shared = static_library('shared', srcs, include_directories: top_include, dependencies: [shared_qt5deps, libmicrohttpddep, protobufdep, alsadep, movitdep])
 shareddep = declare_dependency(
    sources: proto_generated,
    include_directories: top_include,
diff --git a/shared/ref_counted_texture.cpp b/shared/ref_counted_texture.cpp
new file mode 100644 (file)
index 0000000..d10b0dc
--- /dev/null
@@ -0,0 +1,25 @@
+#include "ref_counted_texture.h"
+
+#include <epoxy/gl.h>
+#include <movit/util.h>
+
+RefCountedTexture create_texture_2d(GLuint width, GLuint height, GLenum internal_format, GLenum format, GLenum type, const GLvoid *pixels)
+{
+       GLuint tex;
+       glCreateTextures(GL_TEXTURE_2D, 1, &tex);
+       check_error();
+       glTextureStorage2D(tex, 1, internal_format, width, height);
+       check_error();
+       glTextureSubImage2D(tex, 0, 0, 0, width, height, format, type, pixels);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       check_error();
+       glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+       check_error();
+
+       return RefCountedTexture(new GLuint(tex), TextureDeleter());
+}
index 20d0e5abae0eba26678ed8d4a07cad1f1d85a7d3..240bf869c62cba3d5ae55fdf837025832844943a 100644 (file)
@@ -14,6 +14,10 @@ struct TextureDeleter {
        }
 };
 
-typedef std::unique_ptr<GLuint, TextureDeleter> RefCountedTexture;
+typedef std::unique_ptr<GLuint, TextureDeleter> UniqueTexture;
+typedef std::shared_ptr<GLuint> RefCountedTexture;
+
+// TODO: consider mipmaps.
+RefCountedTexture create_texture_2d(GLuint width, GLuint height, GLenum internal_format, GLenum format, GLenum type, const GLvoid *pixels);
 
 #endif  // !defined(_REF_COUNTED_TEXTURE)