X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=futatabi%2Fjpeg_frame_view.cpp;h=ebcf509dbc91b470d18377e973d97874617093a0;hb=02ea864dc5a6dde7450c497581ff18d784ab832c;hp=1924a543ff017346930f0c2d8c605253555e5728;hpb=b44bf7cfce6a5aaffbcd1e37df39068a163438ad;p=nageru diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp index 1924a54..ebcf509 100644 --- a/futatabi/jpeg_frame_view.cpp +++ b/futatabi/jpeg_frame_view.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -29,13 +30,13 @@ using namespace movit; using namespace std; +using namespace std::chrono; namespace { // Just an arbitrary order for std::map. -struct FrameOnDiskLexicalOrder -{ - bool operator() (const FrameOnDisk &a, const FrameOnDisk &b) const +struct FrameOnDiskLexicalOrder { + bool operator()(const FrameOnDisk &a, const FrameOnDisk &b) const { if (a.pts != b.pts) return a.pts < b.pts; @@ -60,50 +61,40 @@ struct LRUFrame { size_t last_used; }; -struct PendingDecode { - JPEGFrameView *destination; - - // For actual decodes (only if frame below is nullptr). - FrameOnDisk primary, secondary; - float fade_alpha; // Irrelevant if secondary.stream_idx == -1. - - // Already-decoded frames are also sent through PendingDecode, - // so that they get drawn in the right order. If frame is nullptr, - // it's a real decode. - shared_ptr frame; -}; - // There can be multiple JPEGFrameView instances, so make all the metrics static. once_flag jpeg_metrics_inited; -atomic metric_jpeg_cache_used_bytes{0}; // Same value as cache_bytes_used. -atomic metric_jpeg_cache_limit_bytes{size_t(CACHE_SIZE_MB) * 1024 * 1024}; -atomic metric_jpeg_cache_given_up_frames{0}; -atomic metric_jpeg_cache_hit_frames{0}; -atomic metric_jpeg_cache_miss_frames{0}; -atomic metric_jpeg_software_decode_frames{0}; -atomic metric_jpeg_software_fail_frames{0}; -atomic metric_jpeg_vaapi_decode_frames{0}; -atomic metric_jpeg_vaapi_fail_frames{0}; +atomic metric_jpeg_cache_used_bytes{ 0 }; // Same value as cache_bytes_used. +atomic metric_jpeg_cache_limit_bytes{ size_t(CACHE_SIZE_MB) * 1024 * 1024 }; +atomic metric_jpeg_cache_given_up_frames{ 0 }; +atomic metric_jpeg_cache_hit_frames{ 0 }; +atomic metric_jpeg_cache_miss_frames{ 0 }; +atomic metric_jpeg_software_decode_frames{ 0 }; +atomic metric_jpeg_software_fail_frames{ 0 }; +atomic metric_jpeg_vaapi_decode_frames{ 0 }; +atomic metric_jpeg_vaapi_fail_frames{ 0 }; +atomic metric_jpeg_prepared_frames{ 0 }; +atomic metric_jpeg_displayed_frames{ 0 }; +Summary metric_jpeg_decode_time_seconds; } // namespace -thread JPEGFrameView::jpeg_decoder_thread; mutex cache_mu; map cache; // Under cache_mu. size_t cache_bytes_used = 0; // Under cache_mu. -condition_variable any_pending_decodes; -deque pending_decodes; // Under cache_mu. -atomic event_counter{0}; +atomic event_counter{ 0 }; extern QGLWidget *global_share_widget; extern atomic should_quit; shared_ptr decode_jpeg(const string &jpeg) { + steady_clock::time_point start = steady_clock::now(); shared_ptr frame; if (vaapi_jpeg_decoding_usable) { frame = decode_jpeg_vaapi(jpeg); if (frame != nullptr) { ++metric_jpeg_vaapi_decode_frames; + steady_clock::time_point stop = steady_clock::now(); + metric_jpeg_decode_time_seconds.count_event(duration(stop - start).count()); return frame; } fprintf(stderr, "VA-API hardware decoding failed; falling back to software.\n"); @@ -114,24 +105,26 @@ shared_ptr decode_jpeg(const string &jpeg) jpeg_decompress_struct dinfo; JPEGWrapErrorManager error_mgr(&dinfo); - if (!error_mgr.run([&dinfo]{ jpeg_create_decompress(&dinfo); })) { + if (!error_mgr.run([&dinfo] { jpeg_create_decompress(&dinfo); })) { return get_black_frame(); } JPEGDestroyer destroy_dinfo(&dinfo); - if (!error_mgr.run([&dinfo, &jpeg]{ - jpeg_mem_src(&dinfo, reinterpret_cast(jpeg.data()), jpeg.size()); - jpeg_read_header(&dinfo, true); - })) { + if (!error_mgr.run([&dinfo, &jpeg] { + jpeg_mem_src(&dinfo, reinterpret_cast(jpeg.data()), jpeg.size()); + jpeg_read_header(&dinfo, true); + })) { return get_black_frame(); } + jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF); + if (dinfo.num_components != 3) { fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n", - dinfo.num_components, - dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, - dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, - dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); + dinfo.num_components, + dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, + dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, + dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); return get_black_frame(); } if (dinfo.comp_info[0].h_samp_factor != dinfo.max_h_samp_factor || @@ -141,16 +134,16 @@ shared_ptr decode_jpeg(const string &jpeg) (dinfo.max_h_samp_factor % dinfo.comp_info[1].h_samp_factor) != 0 || (dinfo.max_v_samp_factor % dinfo.comp_info[1].v_samp_factor) != 0) { // No 2:3 subsampling or other weirdness. fprintf(stderr, "Unsupported subsampling scheme. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n", - dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, - dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, - dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); - exit(1); + dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, + dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, + dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); + abort(); } dinfo.raw_data_out = true; - if (!error_mgr.run([&dinfo]{ - jpeg_start_decompress(&dinfo); - })) { + if (!error_mgr.run([&dinfo] { + jpeg_start_decompress(&dinfo); + })) { return get_black_frame(); } @@ -176,26 +169,36 @@ shared_ptr decode_jpeg(const string &jpeg) frame->pitch_y = luma_width_blocks * DCTSIZE; frame->pitch_chroma = chroma_width_blocks * DCTSIZE; - if (!error_mgr.run([&dinfo, &frame, v_mcu_size, mcu_height_blocks] { - JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size]; - JSAMPARRAY data[3] = { yptr, cbptr, crptr }; - for (unsigned y = 0; y < mcu_height_blocks; ++y) { - // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma. - for (unsigned yy = 0; yy < v_mcu_size; ++yy) { - yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y; - cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma; - crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma; - } - - jpeg_read_raw_data(&dinfo, data, v_mcu_size); - } + if (dinfo.marker_list != nullptr && + dinfo.marker_list->marker == JPEG_APP0 + 1 && + dinfo.marker_list->data_length >= 4 && + memcmp(dinfo.marker_list->data, "Exif", 4) == 0) { + frame->exif_data.assign(reinterpret_cast(dinfo.marker_list->data), + dinfo.marker_list->data_length); + } - (void)jpeg_finish_decompress(&dinfo); - })) { + if (!error_mgr.run([&dinfo, &frame, v_mcu_size, mcu_height_blocks] { + JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size]; + JSAMPARRAY data[3] = { yptr, cbptr, crptr }; + for (unsigned y = 0; y < mcu_height_blocks; ++y) { + // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma. + for (unsigned yy = 0; yy < v_mcu_size; ++yy) { + yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y; + cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma; + crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma; + } + + jpeg_read_raw_data(&dinfo, data, v_mcu_size); + } + + (void)jpeg_finish_decompress(&dinfo); + })) { return get_black_frame(); } ++metric_jpeg_software_decode_frames; + steady_clock::time_point stop = steady_clock::now(); + metric_jpeg_decode_time_seconds.count_event(duration(stop - start).count()); return frame; } @@ -203,7 +206,8 @@ void prune_cache() { // Assumes cache_mu is held. int64_t bytes_still_to_remove = cache_bytes_used - (size_t(CACHE_SIZE_MB) * 1024 * 1024) * 9 / 10; - if (bytes_still_to_remove <= 0) return; + if (bytes_still_to_remove <= 0) + return; vector> lru_timestamps_and_size; for (const auto &key_and_value : cache) { @@ -218,10 +222,11 @@ void prune_cache() for (const pair &it : lru_timestamps_and_size) { lru_cutoff_point = it.first; bytes_still_to_remove -= it.second; - if (bytes_still_to_remove <= 0) break; + if (bytes_still_to_remove <= 0) + break; } - for (auto it = cache.begin(); it != cache.end(); ) { + for (auto it = cache.begin(); it != cache.end();) { if (it->second.last_used <= lru_cutoff_point) { cache_bytes_used -= frame_size(*it->second.frame); metric_jpeg_cache_used_bytes = cache_bytes_used; @@ -253,7 +258,7 @@ shared_ptr decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi ++metric_jpeg_cache_miss_frames; *did_decode = true; - shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec)); + shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video); lock_guard lock(cache_mu); cache_bytes_used += frame_size(*frame); @@ -276,7 +281,7 @@ void JPEGFrameView::jpeg_decoder_thread_func() CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE; { unique_lock lock(cache_mu); // TODO: Perhaps under another lock? - any_pending_decodes.wait(lock, [] { + any_pending_decodes.wait(lock, [this] { return !pending_decodes.empty() || should_quit.load(); }); if (should_quit.load()) @@ -284,20 +289,14 @@ void JPEGFrameView::jpeg_decoder_thread_func() decode = pending_decodes.front(); pending_decodes.pop_front(); - size_t num_pending = 0; - for (const PendingDecode &other_decode : pending_decodes) { - if (other_decode.destination == decode.destination) { - ++num_pending; - } - } - if (num_pending > 3) { + if (pending_decodes.size() > 3) { cache_miss_behavior = RETURN_NULLPTR_IF_NOT_IN_CACHE; } } if (decode.frame != nullptr) { // Already decoded, so just show it. - decode.destination->setDecodedFrame(decode.frame, nullptr, 1.0f); + setDecodedFrame(decode.frame, nullptr, 1.0f); continue; } @@ -311,7 +310,7 @@ void JPEGFrameView::jpeg_decoder_thread_func() } bool found_in_cache; - shared_ptr frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &decode.destination->frame_reader, &found_in_cache); + shared_ptr frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &frame_reader, &found_in_cache); if (frame == nullptr) { assert(cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE); @@ -323,7 +322,7 @@ void JPEGFrameView::jpeg_decoder_thread_func() ++num_decoded; if (num_decoded % 1000 == 0) { fprintf(stderr, "Decoded %zu images, dropped %zu (%.2f%% dropped)\n", - num_decoded, num_dropped, (100.0 * num_dropped) / (num_decoded + num_dropped)); + num_decoded, num_dropped, (100.0 * num_dropped) / (num_decoded + num_dropped)); } } if (subframe_idx == 0) { @@ -338,11 +337,11 @@ void JPEGFrameView::jpeg_decoder_thread_func() } // TODO: Could we get jitter between non-interpolated and interpolated frames here? - decode.destination->setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha); + setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha); } } -void JPEGFrameView::shutdown() +JPEGFrameView::~JPEGFrameView() { any_pending_decodes.notify_all(); jpeg_decoder_thread.join(); @@ -351,16 +350,21 @@ void JPEGFrameView::shutdown() JPEGFrameView::JPEGFrameView(QWidget *parent) : QGLWidget(parent, global_share_widget) { - call_once(jpeg_metrics_inited, []{ + call_once(jpeg_metrics_inited, [] { global_metrics.add("jpeg_cache_used_bytes", &metric_jpeg_cache_used_bytes, Metrics::TYPE_GAUGE); global_metrics.add("jpeg_cache_limit_bytes", &metric_jpeg_cache_limit_bytes, Metrics::TYPE_GAUGE); - global_metrics.add("jpeg_cache_frames", {{ "action", "given_up" }}, &metric_jpeg_cache_given_up_frames); - global_metrics.add("jpeg_cache_frames", {{ "action", "hit" }}, &metric_jpeg_cache_hit_frames); - global_metrics.add("jpeg_cache_frames", {{ "action", "miss" }}, &metric_jpeg_cache_miss_frames); - global_metrics.add("jpeg_decode_frames", {{ "decoder", "software" }, { "result", "decode" }}, &metric_jpeg_software_decode_frames); - global_metrics.add("jpeg_decode_frames", {{ "decoder", "software" }, { "result", "fail" }}, &metric_jpeg_software_fail_frames); - global_metrics.add("jpeg_decode_frames", {{ "decoder", "vaapi" }, { "result", "decode" }}, &metric_jpeg_vaapi_decode_frames); - global_metrics.add("jpeg_decode_frames", {{ "decoder", "vaapi" }, { "result", "fail" }}, &metric_jpeg_vaapi_fail_frames); + global_metrics.add("jpeg_cache_frames", { { "action", "given_up" } }, &metric_jpeg_cache_given_up_frames); + global_metrics.add("jpeg_cache_frames", { { "action", "hit" } }, &metric_jpeg_cache_hit_frames); + global_metrics.add("jpeg_cache_frames", { { "action", "miss" } }, &metric_jpeg_cache_miss_frames); + global_metrics.add("jpeg_decode_frames", { { "decoder", "software" }, { "result", "decode" } }, &metric_jpeg_software_decode_frames); + global_metrics.add("jpeg_decode_frames", { { "decoder", "software" }, { "result", "fail" } }, &metric_jpeg_software_fail_frames); + global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "decode" } }, &metric_jpeg_vaapi_decode_frames); + global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "fail" } }, &metric_jpeg_vaapi_fail_frames); + global_metrics.add("jpeg_frames", { { "action", "prepared" } }, &metric_jpeg_prepared_frames); + global_metrics.add("jpeg_frames", { { "action", "displayed" } }, &metric_jpeg_displayed_frames); + vector quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 }; + metric_jpeg_decode_time_seconds.init(quantiles, 60.0); + global_metrics.add("jpeg_decode_time_seconds", &metric_jpeg_decode_time_seconds); }); } @@ -373,7 +377,6 @@ void JPEGFrameView::setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk decode.primary = frame; decode.secondary = secondary_frame; decode.fade_alpha = fade_alpha; - decode.destination = this; pending_decodes.push_back(decode); any_pending_decodes.notify_all(); } @@ -383,24 +386,18 @@ void JPEGFrameView::setFrame(shared_ptr frame) lock_guard lock(cache_mu); PendingDecode decode; decode.frame = std::move(frame); - decode.destination = this; pending_decodes.push_back(decode); any_pending_decodes.notify_all(); } -ResourcePool *resource_pool = nullptr; - void JPEGFrameView::initializeGL() { glDisable(GL_BLEND); glDisable(GL_DEPTH_TEST); check_error(); - static once_flag once; - call_once(once, [] { - resource_pool = new ResourcePool; - jpeg_decoder_thread = std::thread(jpeg_decoder_thread_func); - }); + resource_pool = new ResourcePool; + jpeg_decoder_thread = std::thread(&JPEGFrameView::jpeg_decoder_thread_func, this); ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_RGBA, resource_pool)); @@ -435,6 +432,11 @@ void JPEGFrameView::paintGL() return; } + if (!displayed_this_frame) { + ++metric_jpeg_displayed_frames; + displayed_this_frame = true; + } + check_error(); current_chain->render_to_screen(); @@ -443,6 +445,7 @@ void JPEGFrameView::paintGL() overlay_input->set_width(overlay_width); overlay_input->set_height(overlay_height); overlay_input->set_pixel_data(overlay_image->bits()); + overlay_input_needs_refresh = false; } glViewport(gl_width - overlay_width, 0, overlay_width, overlay_height); overlay_chain->render_to_screen(); @@ -464,6 +467,8 @@ void JPEGFrameView::setDecodedFrame(shared_ptr frame, shared_ptr s } else { current_chain = ycbcr_converter->prepare_chain_for_conversion(frame); } + ++metric_jpeg_prepared_frames; + displayed_this_frame = false; update(); }); } @@ -482,10 +487,25 @@ void JPEGFrameView::set_overlay(const string &text) return; } + // Figure out how large the texture needs to be. + { + QImage img(overlay_width, overlay_height, QImage::Format_Grayscale8); + QPainter painter(&img); + QFont font = painter.font(); + font.setPointSize(12); + QFontMetrics metrics(font); + overlay_base_width = lrint(metrics.boundingRect(QString::fromStdString(text)).width() + 8.0); + overlay_base_height = lrint(metrics.height()); + } + float dpr = QGuiApplication::primaryScreen()->devicePixelRatio(); overlay_width = lrint(overlay_base_width * dpr); overlay_height = lrint(overlay_base_height * dpr); + // Work around OpenGL alignment issues. + while (overlay_width % 4 != 0) ++overlay_width; + + // Now do the actual drawing. overlay_image.reset(new QImage(overlay_width, overlay_height, QImage::Format_Grayscale8)); overlay_image->setDevicePixelRatio(dpr); overlay_image->fill(0);