]> git.sesse.net Git - nageru/blobdiff - futatabi/jpeg_frame_view.cpp
Fix a dangling reference (found by GCC 14).
[nageru] / futatabi / jpeg_frame_view.cpp
index c9b8090544b8313f7cf769812580fa4e11e0fd39..6ab19482fb93772956036c19175b6ecef36262e3 100644 (file)
@@ -4,6 +4,8 @@
 #include "flags.h"
 #include "jpeg_destroyer.h"
 #include "jpeglib_error_wrapper.h"
+#include "pbo_pool.h"
+#include "shared/context.h"
 #include "shared/metrics.h"
 #include "shared/post_to_main_thread.h"
 #include "video_stream.h"
@@ -12,6 +14,7 @@
 #include <QMouseEvent>
 #include <QScreen>
 #include <atomic>
+#include <chrono>
 #include <condition_variable>
 #include <deque>
 #include <jpeglib.h>
@@ -29,6 +32,7 @@
 
 using namespace movit;
 using namespace std;
+using namespace std::chrono;
 
 namespace {
 
@@ -59,19 +63,6 @@ struct LRUFrame {
        size_t last_used;
 };
 
-struct PendingDecode {
-       JPEGFrameView *destination;
-
-       // For actual decodes (only if frame below is nullptr).
-       FrameOnDisk primary, secondary;
-       float fade_alpha;  // Irrelevant if secondary.stream_idx == -1.
-
-       // Already-decoded frames are also sent through PendingDecode,
-       // so that they get drawn in the right order. If frame is nullptr,
-       // it's a real decode.
-       shared_ptr<Frame> frame;
-};
-
 // There can be multiple JPEGFrameView instances, so make all the metrics static.
 once_flag jpeg_metrics_inited;
 atomic<int64_t> metric_jpeg_cache_used_bytes{ 0 };  // Same value as cache_bytes_used.
@@ -83,26 +74,29 @@ atomic<int64_t> metric_jpeg_software_decode_frames{ 0 };
 atomic<int64_t> metric_jpeg_software_fail_frames{ 0 };
 atomic<int64_t> metric_jpeg_vaapi_decode_frames{ 0 };
 atomic<int64_t> metric_jpeg_vaapi_fail_frames{ 0 };
+atomic<int64_t> metric_jpeg_prepared_frames{ 0 };
+atomic<int64_t> metric_jpeg_displayed_frames{ 0 };
+Summary metric_jpeg_decode_time_seconds;
 
 }  // namespace
 
-thread JPEGFrameView::jpeg_decoder_thread;
 mutex cache_mu;
 map<FrameOnDisk, LRUFrame, FrameOnDiskLexicalOrder> cache;  // Under cache_mu.
 size_t cache_bytes_used = 0;  // Under cache_mu.
-condition_variable any_pending_decodes;
-deque<PendingDecode> pending_decodes;  // Under cache_mu.
 atomic<size_t> event_counter{ 0 };
 extern QGLWidget *global_share_widget;
 extern atomic<bool> should_quit;
 
 shared_ptr<Frame> decode_jpeg(const string &jpeg)
 {
+       steady_clock::time_point start = steady_clock::now();
        shared_ptr<Frame> frame;
        if (vaapi_jpeg_decoding_usable) {
                frame = decode_jpeg_vaapi(jpeg);
                if (frame != nullptr) {
                        ++metric_jpeg_vaapi_decode_frames;
+                       steady_clock::time_point stop = steady_clock::now();
+                       metric_jpeg_decode_time_seconds.count_event(duration<double>(stop - start).count());
                        return frame;
                }
                fprintf(stderr, "VA-API hardware decoding failed; falling back to software.\n");
@@ -118,6 +112,8 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
        }
        JPEGDestroyer destroy_dinfo(&dinfo);
 
+       jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF);
+
        if (!error_mgr.run([&dinfo, &jpeg] {
                    jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
                    jpeg_read_header(&dinfo, true);
@@ -143,7 +139,7 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
                        dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
                        dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
                        dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
-               exit(1);
+               abort();
        }
        dinfo.raw_data_out = true;
 
@@ -165,25 +161,35 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
 
        unsigned luma_width_blocks = mcu_width_blocks * dinfo.comp_info[0].h_samp_factor;
        unsigned chroma_width_blocks = mcu_width_blocks * dinfo.comp_info[1].h_samp_factor;
-       unsigned luma_height_blocks = mcu_height_blocks * dinfo.comp_info[0].v_samp_factor;
-       unsigned chroma_height_blocks = mcu_height_blocks * dinfo.comp_info[1].v_samp_factor;
 
-       // TODO: Decode into a PBO.
-       frame->y.reset(new uint8_t[luma_width_blocks * luma_height_blocks * DCTSIZE2]);
-       frame->cb.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
-       frame->cr.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
-       frame->pitch_y = luma_width_blocks * DCTSIZE;
-       frame->pitch_chroma = chroma_width_blocks * DCTSIZE;
+       PBO pbo = global_pbo_pool->alloc_pbo();
+       const size_t chroma_width = dinfo.image_width / frame->chroma_subsampling_x;
+       const size_t chroma_height = dinfo.image_height / frame->chroma_subsampling_y;
+       size_t cb_offset = dinfo.image_width * dinfo.image_height;
+       size_t cr_offset = cb_offset + chroma_width * chroma_height;
+       uint8_t *y_pix = pbo.ptr;
+       uint8_t *cb_pix = pbo.ptr + cb_offset;
+       uint8_t *cr_pix = pbo.ptr + cr_offset;
+       unsigned pitch_y = luma_width_blocks * DCTSIZE;
+       unsigned pitch_chroma = chroma_width_blocks * DCTSIZE;
+
+       if (dinfo.marker_list != nullptr &&
+           dinfo.marker_list->marker == JPEG_APP0 + 1 &&
+           dinfo.marker_list->data_length >= 4 &&
+           memcmp(dinfo.marker_list->data, "Exif", 4) == 0) {
+               frame->exif_data.assign(reinterpret_cast<char *>(dinfo.marker_list->data),
+                       dinfo.marker_list->data_length);
+       }
 
-       if (!error_mgr.run([&dinfo, &frame, v_mcu_size, mcu_height_blocks] {
+       if (!error_mgr.run([&dinfo, &y_pix, &cb_pix, &cr_pix, pitch_y, pitch_chroma, v_mcu_size, mcu_height_blocks] {
                    JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size];
                    JSAMPARRAY data[3] = { yptr, cbptr, crptr };
                    for (unsigned y = 0; y < mcu_height_blocks; ++y) {
                            // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma.
                            for (unsigned yy = 0; yy < v_mcu_size; ++yy) {
-                                   yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y;
-                                   cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
-                                   crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
+                                   yptr[yy] = y_pix + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * pitch_y;
+                                   cbptr[yy] = cb_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma;
+                                   crptr[yy] = cr_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma;
                            }
 
                            jpeg_read_raw_data(&dinfo, data, v_mcu_size);
@@ -194,7 +200,24 @@ shared_ptr<Frame> decode_jpeg(const string &jpeg)
                return get_black_frame();
        }
 
+       // FIXME: what about resolutions that are not divisible by the block factor?
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo);
+       frame->y = create_texture_2d(frame->width, frame->height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
+       frame->cb = create_texture_2d(chroma_width, chroma_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cb_offset));
+       frame->cr = create_texture_2d(chroma_width, chroma_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cr_offset));
+       glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+       glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + chroma_width * chroma_height * 2);
+       glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
+       pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       glFlush();
+       frame->uploaded_ui_thread = pbo.upload_done;
+       frame->uploaded_interpolation = pbo.upload_done;
+       global_pbo_pool->release_pbo(move(pbo));
+
        ++metric_jpeg_software_decode_frames;
+       steady_clock::time_point stop = steady_clock::now();
+       metric_jpeg_decode_time_seconds.count_event(duration<double>(stop - start).count());
        return frame;
 }
 
@@ -254,7 +277,7 @@ shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi
        ++metric_jpeg_cache_miss_frames;
 
        *did_decode = true;
-       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec));
+       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video);
 
        lock_guard<mutex> lock(cache_mu);
        cache_bytes_used += frame_size(*frame);
@@ -272,12 +295,19 @@ void JPEGFrameView::jpeg_decoder_thread_func()
        size_t num_decoded = 0, num_dropped = 0;
 
        pthread_setname_np(pthread_self(), "JPEGDecoder");
+       QSurface *surface = create_surface();
+       QOpenGLContext *context = create_context(surface);
+       bool ok = make_current(context, surface);
+       if (!ok) {
+               fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
+               abort();
+       }
        while (!should_quit.load()) {
                PendingDecode decode;
                CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE;
                {
                        unique_lock<mutex> lock(cache_mu);  // TODO: Perhaps under another lock?
-                       any_pending_decodes.wait(lock, [] {
+                       any_pending_decodes.wait(lock, [this] {
                                return !pending_decodes.empty() || should_quit.load();
                        });
                        if (should_quit.load())
@@ -285,20 +315,14 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                        decode = pending_decodes.front();
                        pending_decodes.pop_front();
 
-                       size_t num_pending = 0;
-                       for (const PendingDecode &other_decode : pending_decodes) {
-                               if (other_decode.destination == decode.destination) {
-                                       ++num_pending;
-                               }
-                       }
-                       if (num_pending > 3) {
+                       if (pending_decodes.size() > 3) {
                                cache_miss_behavior = RETURN_NULLPTR_IF_NOT_IN_CACHE;
                        }
                }
 
                if (decode.frame != nullptr) {
                        // Already decoded, so just show it.
-                       decode.destination->setDecodedFrame(decode.frame, nullptr, 1.0f);
+                       setDecodedFrame(decode.frame, nullptr, 1.0f);
                        continue;
                }
 
@@ -312,7 +336,7 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                        }
 
                        bool found_in_cache;
-                       shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &decode.destination->frame_reader, &found_in_cache);
+                       shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &frame_reader, &found_in_cache);
 
                        if (frame == nullptr) {
                                assert(cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE);
@@ -339,11 +363,11 @@ void JPEGFrameView::jpeg_decoder_thread_func()
                }
 
                // TODO: Could we get jitter between non-interpolated and interpolated frames here?
-               decode.destination->setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha);
+               setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha);
        }
 }
 
-void JPEGFrameView::shutdown()
+JPEGFrameView::~JPEGFrameView()
 {
        any_pending_decodes.notify_all();
        jpeg_decoder_thread.join();
@@ -362,6 +386,11 @@ JPEGFrameView::JPEGFrameView(QWidget *parent)
                global_metrics.add("jpeg_decode_frames", { { "decoder", "software" }, { "result", "fail" } }, &metric_jpeg_software_fail_frames);
                global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "decode" } }, &metric_jpeg_vaapi_decode_frames);
                global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "fail" } }, &metric_jpeg_vaapi_fail_frames);
+               global_metrics.add("jpeg_frames", { { "action", "prepared" } }, &metric_jpeg_prepared_frames);
+               global_metrics.add("jpeg_frames", { { "action", "displayed" } }, &metric_jpeg_displayed_frames);
+               vector<double> quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 };
+               metric_jpeg_decode_time_seconds.init(quantiles, 60.0);
+               global_metrics.add("jpeg_decode_time_seconds", &metric_jpeg_decode_time_seconds);
        });
 }
 
@@ -374,7 +403,6 @@ void JPEGFrameView::setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk
        decode.primary = frame;
        decode.secondary = secondary_frame;
        decode.fade_alpha = fade_alpha;
-       decode.destination = this;
        pending_decodes.push_back(decode);
        any_pending_decodes.notify_all();
 }
@@ -384,24 +412,21 @@ void JPEGFrameView::setFrame(shared_ptr<Frame> frame)
        lock_guard<mutex> lock(cache_mu);
        PendingDecode decode;
        decode.frame = std::move(frame);
-       decode.destination = this;
+       decode.fade_alpha = 0.0f;
        pending_decodes.push_back(decode);
        any_pending_decodes.notify_all();
 }
 
-ResourcePool *resource_pool = nullptr;
-
 void JPEGFrameView::initializeGL()
 {
+       init_pbo_pool();
+
        glDisable(GL_BLEND);
        glDisable(GL_DEPTH_TEST);
        check_error();
 
-       static once_flag once;
-       call_once(once, [] {
-               resource_pool = new ResourcePool;
-               jpeg_decoder_thread = std::thread(jpeg_decoder_thread_func);
-       });
+       resource_pool = new ResourcePool;
+       jpeg_decoder_thread = std::thread(&JPEGFrameView::jpeg_decoder_thread_func, this);
 
        ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_RGBA, resource_pool));
 
@@ -436,6 +461,19 @@ void JPEGFrameView::paintGL()
                return;
        }
 
+       if (!displayed_this_frame) {
+               ++metric_jpeg_displayed_frames;
+               displayed_this_frame = true;
+       }
+       if (current_frame->uploaded_ui_thread != nullptr) {
+               glWaitSync(current_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               current_frame->uploaded_ui_thread.reset();
+       }
+       if (current_secondary_frame != nullptr && current_secondary_frame->uploaded_ui_thread != nullptr) {
+               glWaitSync(current_secondary_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               current_secondary_frame->uploaded_ui_thread.reset();
+       }
+
        check_error();
        current_chain->render_to_screen();
 
@@ -444,6 +482,7 @@ void JPEGFrameView::paintGL()
                        overlay_input->set_width(overlay_width);
                        overlay_input->set_height(overlay_height);
                        overlay_input->set_pixel_data(overlay_image->bits());
+                       overlay_input_needs_refresh = false;
                }
                glViewport(gl_width - overlay_width, 0, overlay_width, overlay_height);
                overlay_chain->render_to_screen();
@@ -465,6 +504,8 @@ void JPEGFrameView::setDecodedFrame(shared_ptr<Frame> frame, shared_ptr<Frame> s
                } else {
                        current_chain = ycbcr_converter->prepare_chain_for_conversion(frame);
                }
+               ++metric_jpeg_prepared_frames;
+               displayed_this_frame = false;
                update();
        });
 }
@@ -483,10 +524,25 @@ void JPEGFrameView::set_overlay(const string &text)
                return;
        }
 
+       // Figure out how large the texture needs to be.
+       {
+               QImage img(overlay_width, overlay_height, QImage::Format_Grayscale8);
+               QPainter painter(&img);
+               QFont font = painter.font();
+               font.setPointSize(12);
+               QFontMetrics metrics(font);
+               overlay_base_width = lrint(metrics.boundingRect(QString::fromStdString(text)).width() + 8.0);
+               overlay_base_height = lrint(metrics.height());
+       }
+
        float dpr = QGuiApplication::primaryScreen()->devicePixelRatio();
        overlay_width = lrint(overlay_base_width * dpr);
        overlay_height = lrint(overlay_base_height * dpr);
 
+       // Work around OpenGL alignment issues.
+       while (overlay_width % 4 != 0) ++overlay_width;
+
+       // Now do the actual drawing.
        overlay_image.reset(new QImage(overlay_width, overlay_height, QImage::Format_Grayscale8));
        overlay_image->setDevicePixelRatio(dpr);
        overlay_image->fill(0);
@@ -508,16 +564,17 @@ shared_ptr<Frame> get_black_frame()
        static shared_ptr<Frame> black_frame;
        static once_flag flag;
        call_once(flag, [] {
-               black_frame.reset(new Frame);
-               black_frame->y.reset(new uint8_t[global_flags.width * global_flags.height]);
-               black_frame->cb.reset(new uint8_t[(global_flags.width / 2) * (global_flags.height / 2)]);
-               black_frame->cr.reset(new uint8_t[(global_flags.width / 2) * (global_flags.height / 2)]);
-               black_frame->width = global_flags.width;
-               black_frame->height = global_flags.height;
-               black_frame->chroma_subsampling_x = 2;
-               black_frame->chroma_subsampling_y = 2;
-               black_frame->pitch_y = global_flags.width;
-               black_frame->pitch_chroma = global_flags.width / 2;
+               // Not really black, but whatever. :-)
+               uint8_t black[] = { 0, 0, 0, 255 };
+               RefCountedTexture black_tex = create_texture_2d(1, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, black);
+
+               black_frame->y = black_tex;
+               black_frame->cb = black_tex;
+               black_frame->cr = move(black_tex);
+               black_frame->width = 1;
+               black_frame->height = 1;
+               black_frame->chroma_subsampling_x = 1;
+               black_frame->chroma_subsampling_y = 1;
        });
        ++metric_jpeg_software_fail_frames;
        return black_frame;