]> git.sesse.net Git - nageru/blobdiff - futatabi/jpeg_frame_view.cpp
Fix a dangling reference (found by GCC 14).
[nageru] / futatabi / jpeg_frame_view.cpp
index 283ff672f73d987473446e32c9fe59f5f06d9e8a..6ab19482fb93772956036c19175b6ecef36262e3 100644 (file)
@@ -1,7 +1,12 @@
 #include "jpeg_frame_view.h"
 
 #include "defs.h"
+#include "flags.h"
 #include "jpeg_destroyer.h"
+#include "jpeglib_error_wrapper.h"
+#include "pbo_pool.h"
+#include "shared/context.h"
+#include "shared/metrics.h"
 #include "shared/post_to_main_thread.h"
 #include "video_stream.h"
 #include "ycbcr_converter.h"
@@ -9,6 +14,7 @@
 #include <QMouseEvent>
 #include <QScreen>
 #include <atomic>
+#include <chrono>
 #include <condition_variable>
 #include <deque>
 #include <jpeglib.h>
 
 using namespace movit;
 using namespace std;
+using namespace std::chrono;
 
 namespace {
 
 // Just an arbitrary order for std::map.
-struct FrameOnDiskLexicalOrder
-{
-       bool operator() (const FrameOnDisk &a, const FrameOnDisk &b) const
+struct FrameOnDiskLexicalOrder {
+       bool operator()(const FrameOnDisk &a, const FrameOnDisk &b) const
        {
                if (a.pts != b.pts)
                        return a.pts < b.pts;
@@ -57,66 +63,71 @@ struct LRUFrame {
        size_t last_used;
 };
 
-struct PendingDecode {
-       JPEGFrameView *destination;
-
-       // For actual decodes (only if frame below is nullptr).
-       FrameOnDisk primary, secondary;
-       float fade_alpha;  // Irrelevant if secondary.stream_idx == -1.
-
-       // Already-decoded frames are also sent through PendingDecode,
-       // so that they get drawn in the right order. If frame is nullptr,
-       // it's a real decode.
-       shared_ptr<Frame> frame;
-};
+// There can be multiple JPEGFrameView instances, so make all the metrics static.
+once_flag jpeg_metrics_inited;
+atomic<int64_t> metric_jpeg_cache_used_bytes{ 0 };  // Same value as cache_bytes_used.
+atomic<int64_t> metric_jpeg_cache_limit_bytes{ size_t(CACHE_SIZE_MB) * 1024 * 1024 };
+atomic<int64_t> metric_jpeg_cache_given_up_frames{ 0 };
+atomic<int64_t> metric_jpeg_cache_hit_frames{ 0 };
+atomic<int64_t> metric_jpeg_cache_miss_frames{ 0 };
+atomic<int64_t> metric_jpeg_software_decode_frames{ 0 };
+atomic<int64_t> metric_jpeg_software_fail_frames{ 0 };
+atomic<int64_t> metric_jpeg_vaapi_decode_frames{ 0 };
+atomic<int64_t> metric_jpeg_vaapi_fail_frames{ 0 };
+atomic<int64_t> metric_jpeg_prepared_frames{ 0 };
+atomic<int64_t> metric_jpeg_displayed_frames{ 0 };
+Summary metric_jpeg_decode_time_seconds;
 
 }  // namespace
 
-thread JPEGFrameView::jpeg_decoder_thread;
 mutex cache_mu;
 map<FrameOnDisk, LRUFrame, FrameOnDiskLexicalOrder> cache;  // Under cache_mu.
 size_t cache_bytes_used = 0;  // Under cache_mu.
-condition_variable any_pending_decodes;
-deque<PendingDecode> pending_decodes;  // Under cache_mu.
-atomic<size_t> event_counter{0};
+atomic<size_t> event_counter{ 0 };
 extern QGLWidget *global_share_widget;
 extern atomic<bool> should_quit;
 
-shared_ptr<Frame> decode_jpeg(const string &filename)
+shared_ptr<Frame> decode_jpeg(const string &jpeg)
 {
+       steady_clock::time_point start = steady_clock::now();
        shared_ptr<Frame> frame;
        if (vaapi_jpeg_decoding_usable) {
-               frame = decode_jpeg_vaapi(filename);
+               frame = decode_jpeg_vaapi(jpeg);
                if (frame != nullptr) {
+                       ++metric_jpeg_vaapi_decode_frames;
+                       steady_clock::time_point stop = steady_clock::now();
+                       metric_jpeg_decode_time_seconds.count_event(duration<double>(stop - start).count());
                        return frame;
                }
                fprintf(stderr, "VA-API hardware decoding failed; falling back to software.\n");
+               ++metric_jpeg_vaapi_fail_frames;
        }
 
        frame.reset(new Frame);
 
        jpeg_decompress_struct dinfo;
-       jpeg_error_mgr jerr;
-       dinfo.err = jpeg_std_error(&jerr);
-       jpeg_create_decompress(&dinfo);
+       JPEGWrapErrorManager error_mgr(&dinfo);
+       if (!error_mgr.run([&dinfo] { jpeg_create_decompress(&dinfo); })) {
+               return get_black_frame();
+       }
        JPEGDestroyer destroy_dinfo(&dinfo);
 
-       FILE *fp = fopen(filename.c_str(), "rb");
-       if (fp == nullptr) {
-               perror(filename.c_str());
-               exit(1);
-       }
-       jpeg_stdio_src(&dinfo, fp);
+       jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF);
 
-       jpeg_read_header(&dinfo, true);
+       if (!error_mgr.run([&dinfo, &jpeg] {
+                   jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
+                   jpeg_read_header(&dinfo, true);
+           })) {
+               return get_black_frame();
+       }
 
        if (dinfo.num_components != 3) {
                fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
-                       dinfo.num_components,
-                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
-                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
-                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
-               exit(1);
+                       dinfo.num_components,
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+               return get_black_frame();
        }
        if (dinfo.comp_info[0].h_samp_factor != dinfo.max_h_samp_factor ||
            dinfo.comp_info[0].v_samp_factor != dinfo.max_v_samp_factor ||  // Y' must not be subsampled.
@@ -125,14 +136,18 @@ shared_ptr<Frame> decode_jpeg(const string &filename)
            (dinfo.max_h_samp_factor % dinfo.comp_info[1].h_samp_factor) != 0 ||
            (dinfo.max_v_samp_factor % dinfo.comp_info[1].v_samp_factor) != 0) {  // No 2:3 subsampling or other weirdness.
                fprintf(stderr, "Unsupported subsampling scheme. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
-                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
-                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
-                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
-               exit(1);
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+               abort();
        }
        dinfo.raw_data_out = true;
 
-       jpeg_start_decompress(&dinfo);
+       if (!error_mgr.run([&dinfo] {
+                   jpeg_start_decompress(&dinfo);
+           })) {
+               return get_black_frame();
+       }
 
        frame->width = dinfo.output_width;
        frame->height = dinfo.output_height;
@@ -146,32 +161,63 @@ shared_ptr<Frame> decode_jpeg(const string &filename)
 
        unsigned luma_width_blocks = mcu_width_blocks * dinfo.comp_info[0].h_samp_factor;
        unsigned chroma_width_blocks = mcu_width_blocks * dinfo.comp_info[1].h_samp_factor;
-       unsigned luma_height_blocks = mcu_height_blocks * dinfo.comp_info[0].v_samp_factor;
-       unsigned chroma_height_blocks = mcu_height_blocks * dinfo.comp_info[1].v_samp_factor;
-
-       // TODO: Decode into a PBO.
-       frame->y.reset(new uint8_t[luma_width_blocks * luma_height_blocks * DCTSIZE2]);
-       frame->cb.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
-       frame->cr.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
-       frame->pitch_y = luma_width_blocks * DCTSIZE;
-       frame->pitch_chroma = chroma_width_blocks * DCTSIZE;
-
-       JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size];
-       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
-       for (unsigned y = 0; y < mcu_height_blocks; ++y) {
-               // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma.
-               for (unsigned yy = 0; yy < v_mcu_size; ++yy) {
-                       yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y;
-                       cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
-                       crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
-               }
 
-               jpeg_read_raw_data(&dinfo, data, v_mcu_size);
+       PBO pbo = global_pbo_pool->alloc_pbo();
+       const size_t chroma_width = dinfo.image_width / frame->chroma_subsampling_x;
+       const size_t chroma_height = dinfo.image_height / frame->chroma_subsampling_y;
+       size_t cb_offset = dinfo.image_width * dinfo.image_height;
+       size_t cr_offset = cb_offset + chroma_width * chroma_height;
+       uint8_t *y_pix = pbo.ptr;
+       uint8_t *cb_pix = pbo.ptr + cb_offset;
+       uint8_t *cr_pix = pbo.ptr + cr_offset;
+       unsigned pitch_y = luma_width_blocks * DCTSIZE;
+       unsigned pitch_chroma = chroma_width_blocks * DCTSIZE;
+
+       if (dinfo.marker_list != nullptr &&
+           dinfo.marker_list->marker == JPEG_APP0 + 1 &&
+           dinfo.marker_list->data_length >= 4 &&
+           memcmp(dinfo.marker_list->data, "Exif", 4) == 0) {
+               frame->exif_data.assign(reinterpret_cast<char *>(dinfo.marker_list->data),
+                       dinfo.marker_list->data_length);
        }
 
-       (void)jpeg_finish_decompress(&dinfo);
-       fclose(fp);
+       if (!error_mgr.run([&dinfo, &y_pix, &cb_pix, &cr_pix, pitch_y, pitch_chroma, v_mcu_size, mcu_height_blocks] {
+                   JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size];
+                   JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+                   for (unsigned y = 0; y < mcu_height_blocks; ++y) {
+                           // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma.
+                           for (unsigned yy = 0; yy < v_mcu_size; ++yy) {
+                                   yptr[yy] = y_pix + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * pitch_y;
+                                   cbptr[yy] = cb_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma;
+                                   crptr[yy] = cr_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma;
+                           }
+
+                           jpeg_read_raw_data(&dinfo, data, v_mcu_size);
+                   }
+
+                   (void)jpeg_finish_decompress(&dinfo);
+           })) {
+               return get_black_frame();
+       }
 
+       // FIXME: what about resolutions that are not divisible by the block factor?
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
+       frame->y = create_texture_2d(frame->width, frame->height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       frame->cb = create_texture_2d(chroma_width, chroma_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cb_offset));
+       frame->cr = create_texture_2d(chroma_width, chroma_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cr_offset));
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + chroma_width * chroma_height * 2);
+       glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
+       pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       glFlush();
+       frame->uploaded_ui_thread = pbo.upload_done;
+       frame->uploaded_interpolation = pbo.upload_done;
+       global_pbo_pool->release_pbo(move(pbo));
+
+       ++metric_jpeg_software_decode_frames;
+       steady_clock::time_point stop = steady_clock::now();
+       metric_jpeg_decode_time_seconds.count_event(duration<double>(stop - start).count());
        return frame;
 }
 
@@ -179,7 +225,8 @@ void prune_cache()
 {
        // Assumes cache_mu is held.
        int64_t bytes_still_to_remove = cache_bytes_used - (size_t(CACHE_SIZE_MB) * 1024 * 1024) * 9 / 10;
-       if (bytes_still_to_remove <= 0) return;
+       if (bytes_still_to_remove <= 0)
+               return;
 
        vector<pair<size_t, size_t>> lru_timestamps_and_size;
        for (const auto &key_and_value : cache) {
@@ -194,12 +241,14 @@ void prune_cache()
        for (const pair<size_t, size_t> &it : lru_timestamps_and_size) {
                lru_cutoff_point = it.first;
                bytes_still_to_remove -= it.second;
-               if (bytes_still_to_remove <= 0) break;
+               if (bytes_still_to_remove <= 0)
+                       break;
        }
 
-       for (auto it = cache.begin(); it != cache.end(); ) {
+       for (auto it = cache.begin(); it != cache.end();) {
                if (it->second.last_used <= lru_cutoff_point) {
                        cache_bytes_used -= frame_size(*it->second.frame);
+                       metric_jpeg_cache_used_bytes = cache_bytes_used;
                        it = cache.erase(it);
                } else {
                        ++it;
@@ -211,23 +260,28 @@ shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi
 {
        *did_decode = false;
        {
-               unique_lock<mutex> lock(cache_mu);
+               lock_guard<mutex> lock(cache_mu);
                auto it = cache.find(frame_spec);
                if (it != cache.end()) {
+                       ++metric_jpeg_cache_hit_frames;
                        it->second.last_used = event_counter++;
                        return it->second.frame;
                }
        }
 
        if (cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE) {
+               ++metric_jpeg_cache_given_up_frames;
                return nullptr;
        }
 
+       ++metric_jpeg_cache_miss_frames;
+
        *did_decode = true;
-       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec));
+       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video);
 
-       unique_lock<mutex> lock(cache_mu);
+       lock_guard<mutex> lock(cache_mu);
        cache_bytes_used += frame_size(*frame);
+       metric_jpeg_cache_used_bytes = cache_bytes_used;
        cache[frame_spec] = LRUFrame{ frame, event_counter++ };
 
        if (cache_bytes_used > size_t(CACHE_SIZE_MB) * 1024 * 1024) {
@@ -241,12 +295,19 @@ void JPEGFrameView::jpeg_decoder_thread_func()
        size_t num_decoded = 0, num_dropped = 0;
 
        pthread_setname_np(pthread_self(), "JPEGDecoder");
+       QSurface *surface = create_surface();
+       QOpenGLContext *context = create_context(surface);
+       bool ok = make_current(context, surface);
+       if (!ok) {
+               fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
+               abort();
+       }
        while (!should_quit.load()) {
                PendingDecode decode;
                CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE;
                {
                        unique_lock<mutex> lock(cache_mu);  // TODO: Perhaps under another lock?
-                       any_pending_decodes.wait(lock, [] {
+                       any_pending_decodes.wait(lock, [this] {
                                return !pending_decodes.empty() || should_quit.load();
                        });
                        if (should_quit.load())
@@ -254,20 +315,14 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                        decode = pending_decodes.front();
                        pending_decodes.pop_front();
 
-                       size_t num_pending = 0;
-                       for (const PendingDecode &other_decode : pending_decodes) {
-                               if (other_decode.destination == decode.destination) {
-                                       ++num_pending;
-                               }
-                       }
-                       if (num_pending > 3) {
+                       if (pending_decodes.size() > 3) {
                                cache_miss_behavior = RETURN_NULLPTR_IF_NOT_IN_CACHE;
                        }
                }
 
                if (decode.frame != nullptr) {
                        // Already decoded, so just show it.
-                       decode.destination->setDecodedFrame(decode.frame, nullptr, 1.0f);
+                       setDecodedFrame(decode.frame, nullptr, 1.0f);
                        continue;
                }
 
@@ -281,7 +336,7 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                        }
 
                        bool found_in_cache;
-                       shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &decode.destination->frame_reader, &found_in_cache);
+                       shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &frame_reader, &found_in_cache);
 
                        if (frame == nullptr) {
                                assert(cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE);
@@ -293,7 +348,7 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                                ++num_decoded;
                                if (num_decoded % 1000 == 0) {
                                        fprintf(stderr, "Decoded %zu images, dropped %zu (%.2f%% dropped)\n",
-                                               num_decoded, num_dropped, (100.0 * num_dropped) / (num_decoded + num_dropped));
+                                               num_decoded, num_dropped, (100.0 * num_dropped) / (num_decoded + num_dropped));
                                }
                        }
                        if (subframe_idx == 0) {
@@ -308,11 +363,11 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                }
 
                // TODO: Could we get jitter between non-interpolated and interpolated frames here?
-               decode.destination->setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha);
+               setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha);
        }
 }
 
-void JPEGFrameView::shutdown()
+JPEGFrameView::~JPEGFrameView()
 {
        any_pending_decodes.notify_all();
        jpeg_decoder_thread.join();
@@ -321,45 +376,57 @@ void JPEGFrameView::shutdown()
 JPEGFrameView::JPEGFrameView(QWidget *parent)
        : QGLWidget(parent, global_share_widget)
 {
+       call_once(jpeg_metrics_inited, [] {
+               global_metrics.add("jpeg_cache_used_bytes", &metric_jpeg_cache_used_bytes, Metrics::TYPE_GAUGE);
+               global_metrics.add("jpeg_cache_limit_bytes", &metric_jpeg_cache_limit_bytes, Metrics::TYPE_GAUGE);
+               global_metrics.add("jpeg_cache_frames", { { "action", "given_up" } }, &metric_jpeg_cache_given_up_frames);
+               global_metrics.add("jpeg_cache_frames", { { "action", "hit" } }, &metric_jpeg_cache_hit_frames);
+               global_metrics.add("jpeg_cache_frames", { { "action", "miss" } }, &metric_jpeg_cache_miss_frames);
+               global_metrics.add("jpeg_decode_frames", { { "decoder", "software" }, { "result", "decode" } }, &metric_jpeg_software_decode_frames);
+               global_metrics.add("jpeg_decode_frames", { { "decoder", "software" }, { "result", "fail" } }, &metric_jpeg_software_fail_frames);
+               global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "decode" } }, &metric_jpeg_vaapi_decode_frames);
+               global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "fail" } }, &metric_jpeg_vaapi_fail_frames);
+               global_metrics.add("jpeg_frames", { { "action", "prepared" } }, &metric_jpeg_prepared_frames);
+               global_metrics.add("jpeg_frames", { { "action", "displayed" } }, &metric_jpeg_displayed_frames);
+               vector<double> quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 };
+               metric_jpeg_decode_time_seconds.init(quantiles, 60.0);
+               global_metrics.add("jpeg_decode_time_seconds", &metric_jpeg_decode_time_seconds);
+       });
 }
 
 void JPEGFrameView::setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk secondary_frame, float fade_alpha)
 {
        current_stream_idx = stream_idx;  // TODO: Does this interact with fades?
 
-       unique_lock<mutex> lock(cache_mu);
+       lock_guard<mutex> lock(cache_mu);
        PendingDecode decode;
        decode.primary = frame;
        decode.secondary = secondary_frame;
        decode.fade_alpha = fade_alpha;
-       decode.destination = this;
        pending_decodes.push_back(decode);
        any_pending_decodes.notify_all();
 }
 
 void JPEGFrameView::setFrame(shared_ptr<Frame> frame)
 {
-       unique_lock<mutex> lock(cache_mu);
+       lock_guard<mutex> lock(cache_mu);
        PendingDecode decode;
        decode.frame = std::move(frame);
-       decode.destination = this;
+       decode.fade_alpha = 0.0f;
        pending_decodes.push_back(decode);
        any_pending_decodes.notify_all();
 }
 
-ResourcePool *resource_pool = nullptr;
-
 void JPEGFrameView::initializeGL()
 {
+       init_pbo_pool();
+
        glDisable(GL_BLEND);
        glDisable(GL_DEPTH_TEST);
        check_error();
 
-       static once_flag once;
-       call_once(once, [] {
-               resource_pool = new ResourcePool;
-               jpeg_decoder_thread = std::thread(jpeg_decoder_thread_func);
-       });
+       resource_pool = new ResourcePool;
+       jpeg_decoder_thread = std::thread(&JPEGFrameView::jpeg_decoder_thread_func, this);
 
        ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_RGBA, resource_pool));
 
@@ -394,6 +461,19 @@ void JPEGFrameView::paintGL()
                return;
        }
 
+       if (!displayed_this_frame) {
+               ++metric_jpeg_displayed_frames;
+               displayed_this_frame = true;
+       }
+       if (current_frame->uploaded_ui_thread != nullptr) {
+               glWaitSync(current_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               current_frame->uploaded_ui_thread.reset();
+       }
+       if (current_secondary_frame != nullptr && current_secondary_frame->uploaded_ui_thread != nullptr) {
+               glWaitSync(current_secondary_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               current_secondary_frame->uploaded_ui_thread.reset();
+       }
+
        check_error();
        current_chain->render_to_screen();
 
@@ -402,6 +482,7 @@ void JPEGFrameView::paintGL()
                        overlay_input->set_width(overlay_width);
                        overlay_input->set_height(overlay_height);
                        overlay_input->set_pixel_data(overlay_image->bits());
+                       overlay_input_needs_refresh = false;
                }
                glViewport(gl_width - overlay_width, 0, overlay_width, overlay_height);
                overlay_chain->render_to_screen();
@@ -423,6 +504,8 @@ void JPEGFrameView::setDecodedFrame(shared_ptr<Frame> frame, shared_ptr<Frame> s
                } else {
                        current_chain = ycbcr_converter->prepare_chain_for_conversion(frame);
                }
+               ++metric_jpeg_prepared_frames;
+               displayed_this_frame = false;
                update();
        });
 }
@@ -441,10 +524,25 @@ void JPEGFrameView::set_overlay(const string &text)
                return;
        }
 
+       // Figure out how large the texture needs to be.
+       {
+               QImage img(overlay_width, overlay_height, QImage::Format_Grayscale8);
+               QPainter painter(&img);
+               QFont font = painter.font();
+               font.setPointSize(12);
+               QFontMetrics metrics(font);
+               overlay_base_width = lrint(metrics.boundingRect(QString::fromStdString(text)).width() + 8.0);
+               overlay_base_height = lrint(metrics.height());
+       }
+
        float dpr = QGuiApplication::primaryScreen()->devicePixelRatio();
        overlay_width = lrint(overlay_base_width * dpr);
        overlay_height = lrint(overlay_base_height * dpr);
 
+       // Work around OpenGL alignment issues.
+       while (overlay_width % 4 != 0) ++overlay_width;
+
+       // Now do the actual drawing.
        overlay_image.reset(new QImage(overlay_width, overlay_height, QImage::Format_Grayscale8));
        overlay_image->setDevicePixelRatio(dpr);
        overlay_image->fill(0);
@@ -460,3 +558,24 @@ void JPEGFrameView::set_overlay(const string &text)
        // Don't refresh immediately; we might not have an OpenGL context here.
        overlay_input_needs_refresh = true;
 }
+
+shared_ptr<Frame> get_black_frame()
+{
+       static shared_ptr<Frame> black_frame;
+       static once_flag flag;
+       call_once(flag, [] {
+               // Not really black, but whatever. :-)
+               uint8_t black[] = { 0, 0, 0, 255 };
+               RefCountedTexture black_tex = create_texture_2d(1, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, black);
+
+               black_frame->y = black_tex;
+               black_frame->cb = black_tex;
+               black_frame->cr = move(black_tex);
+               black_frame->width = 1;
+               black_frame->height = 1;
+               black_frame->chroma_subsampling_x = 1;
+               black_frame->chroma_subsampling_y = 1;
+       });
+       ++metric_jpeg_software_fail_frames;
+       return black_frame;
+}