X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=futatabi%2Fjpeg_frame_view.cpp;h=85c708d4d20f9aff8f137d0d3573b489d9a368f5;hb=refs%2Fheads%2Fmaster;hp=d35964ce4c0709861d68b5cbfdcee18e2c9e1643;hpb=36ae902913f91a6e4d3d6a1f5d16a0ab1b92c3ae;p=nageru diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp index d35964c..6dafb3e 100644 --- a/futatabi/jpeg_frame_view.cpp +++ b/futatabi/jpeg_frame_view.cpp @@ -4,6 +4,8 @@ #include "flags.h" #include "jpeg_destroyer.h" #include "jpeglib_error_wrapper.h" +#include "pbo_pool.h" +#include "shared/context.h" #include "shared/metrics.h" #include "shared/post_to_main_thread.h" #include "video_stream.h" @@ -12,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +32,7 @@ using namespace movit; using namespace std; +using namespace std::chrono; namespace { @@ -70,6 +74,9 @@ atomic metric_jpeg_software_decode_frames{ 0 }; atomic metric_jpeg_software_fail_frames{ 0 }; atomic metric_jpeg_vaapi_decode_frames{ 0 }; atomic metric_jpeg_vaapi_fail_frames{ 0 }; +atomic metric_jpeg_prepared_frames{ 0 }; +atomic metric_jpeg_displayed_frames{ 0 }; +Summary metric_jpeg_decode_time_seconds; } // namespace @@ -82,11 +89,14 @@ extern atomic should_quit; shared_ptr decode_jpeg(const string &jpeg) { + steady_clock::time_point start = steady_clock::now(); shared_ptr frame; if (vaapi_jpeg_decoding_usable) { frame = decode_jpeg_vaapi(jpeg); if (frame != nullptr) { ++metric_jpeg_vaapi_decode_frames; + steady_clock::time_point stop = steady_clock::now(); + metric_jpeg_decode_time_seconds.count_event(duration(stop - start).count()); return frame; } fprintf(stderr, "VA-API hardware decoding failed; falling back to software.\n"); @@ -102,6 +112,8 @@ shared_ptr decode_jpeg(const string &jpeg) } JPEGDestroyer destroy_dinfo(&dinfo); + jpeg_save_markers(&dinfo, JPEG_APP0 + 1, 0xFFFF); + if (!error_mgr.run([&dinfo, &jpeg] { jpeg_mem_src(&dinfo, reinterpret_cast(jpeg.data()), jpeg.size()); jpeg_read_header(&dinfo, true); @@ -127,7 +139,7 @@ shared_ptr decode_jpeg(const string &jpeg) dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor, dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor, dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor); - exit(1); + abort(); } dinfo.raw_data_out = true; @@ -149,25 +161,37 @@ shared_ptr decode_jpeg(const string &jpeg) unsigned luma_width_blocks = mcu_width_blocks * dinfo.comp_info[0].h_samp_factor; unsigned chroma_width_blocks = mcu_width_blocks * dinfo.comp_info[1].h_samp_factor; - unsigned luma_height_blocks = mcu_height_blocks * dinfo.comp_info[0].v_samp_factor; - unsigned chroma_height_blocks = mcu_height_blocks * dinfo.comp_info[1].v_samp_factor; - - // TODO: Decode into a PBO. - frame->y.reset(new uint8_t[luma_width_blocks * luma_height_blocks * DCTSIZE2]); - frame->cb.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]); - frame->cr.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]); - frame->pitch_y = luma_width_blocks * DCTSIZE; - frame->pitch_chroma = chroma_width_blocks * DCTSIZE; - - if (!error_mgr.run([&dinfo, &frame, v_mcu_size, mcu_height_blocks] { - JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size]; - JSAMPARRAY data[3] = { yptr, cbptr, crptr }; + + PBO pbo = global_pbo_pool->alloc_pbo(); + const size_t chroma_width = dinfo.image_width / frame->chroma_subsampling_x; + const size_t chroma_height = dinfo.image_height / frame->chroma_subsampling_y; + size_t cb_offset = dinfo.image_width * dinfo.image_height; + size_t cr_offset = cb_offset + chroma_width * chroma_height; + uint8_t *y_pix = pbo.ptr; + uint8_t *cb_pix = pbo.ptr + cb_offset; + uint8_t *cr_pix = pbo.ptr + cr_offset; + unsigned pitch_y = luma_width_blocks * DCTSIZE; + unsigned pitch_chroma = chroma_width_blocks * DCTSIZE; + + if (dinfo.marker_list != nullptr && + dinfo.marker_list->marker == JPEG_APP0 + 1 && + dinfo.marker_list->data_length >= 4 && + memcmp(dinfo.marker_list->data, "Exif", 4) == 0) { + frame->exif_data.assign(reinterpret_cast(dinfo.marker_list->data), + dinfo.marker_list->data_length); + } + + if (!error_mgr.run([&dinfo, &y_pix, &cb_pix, &cr_pix, pitch_y, pitch_chroma, v_mcu_size, mcu_height_blocks] { + unique_ptr yptr(new JSAMPROW[v_mcu_size]); + unique_ptr cbptr(new JSAMPROW[v_mcu_size]); + unique_ptr crptr(new JSAMPROW[v_mcu_size]); + JSAMPARRAY data[3] = { yptr.get(), cbptr.get(), crptr.get() }; for (unsigned y = 0; y < mcu_height_blocks; ++y) { // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma. for (unsigned yy = 0; yy < v_mcu_size; ++yy) { - yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y; - cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma; - crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma; + yptr[yy] = y_pix + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * pitch_y; + cbptr[yy] = cb_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma; + crptr[yy] = cr_pix + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * pitch_chroma; } jpeg_read_raw_data(&dinfo, data, v_mcu_size); @@ -178,7 +202,24 @@ shared_ptr decode_jpeg(const string &jpeg) return get_black_frame(); } + // FIXME: what about resolutions that are not divisible by the block factor? + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.pbo); + frame->y = create_texture_2d(frame->width, frame->height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0)); + frame->cb = create_texture_2d(chroma_width, chroma_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cb_offset)); + frame->cr = create_texture_2d(chroma_width, chroma_height, GL_R8, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(cr_offset)); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glFlushMappedNamedBufferRange(pbo.pbo, 0, dinfo.image_width * dinfo.image_height + chroma_width * chroma_height * 2); + glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); + pbo.upload_done = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); + glFlush(); + frame->uploaded_ui_thread = pbo.upload_done; + frame->uploaded_interpolation = pbo.upload_done; + global_pbo_pool->release_pbo(move(pbo)); + ++metric_jpeg_software_decode_frames; + steady_clock::time_point stop = steady_clock::now(); + metric_jpeg_decode_time_seconds.count_event(duration(stop - start).count()); return frame; } @@ -238,7 +279,7 @@ shared_ptr decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi ++metric_jpeg_cache_miss_frames; *did_decode = true; - shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec)); + shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video); lock_guard lock(cache_mu); cache_bytes_used += frame_size(*frame); @@ -256,6 +297,13 @@ void JPEGFrameView::jpeg_decoder_thread_func() size_t num_decoded = 0, num_dropped = 0; pthread_setname_np(pthread_self(), "JPEGDecoder"); + QSurface *surface = create_surface(); + QOpenGLContext *context = create_context(surface); + bool ok = make_current(context, surface); + if (!ok) { + fprintf(stderr, "Video stream couldn't get an OpenGL context\n"); + abort(); + } while (!should_quit.load()) { PendingDecode decode; CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE; @@ -340,6 +388,11 @@ JPEGFrameView::JPEGFrameView(QWidget *parent) global_metrics.add("jpeg_decode_frames", { { "decoder", "software" }, { "result", "fail" } }, &metric_jpeg_software_fail_frames); global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "decode" } }, &metric_jpeg_vaapi_decode_frames); global_metrics.add("jpeg_decode_frames", { { "decoder", "vaapi" }, { "result", "fail" } }, &metric_jpeg_vaapi_fail_frames); + global_metrics.add("jpeg_frames", { { "action", "prepared" } }, &metric_jpeg_prepared_frames); + global_metrics.add("jpeg_frames", { { "action", "displayed" } }, &metric_jpeg_displayed_frames); + vector quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 }; + metric_jpeg_decode_time_seconds.init(quantiles, 60.0); + global_metrics.add("jpeg_decode_time_seconds", &metric_jpeg_decode_time_seconds); }); } @@ -361,12 +414,15 @@ void JPEGFrameView::setFrame(shared_ptr frame) lock_guard lock(cache_mu); PendingDecode decode; decode.frame = std::move(frame); + decode.fade_alpha = 0.0f; pending_decodes.push_back(decode); any_pending_decodes.notify_all(); } void JPEGFrameView::initializeGL() { + init_pbo_pool(); + glDisable(GL_BLEND); glDisable(GL_DEPTH_TEST); check_error(); @@ -407,6 +463,19 @@ void JPEGFrameView::paintGL() return; } + if (!displayed_this_frame) { + ++metric_jpeg_displayed_frames; + displayed_this_frame = true; + } + if (current_frame->uploaded_ui_thread != nullptr) { + glWaitSync(current_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); + current_frame->uploaded_ui_thread.reset(); + } + if (current_secondary_frame != nullptr && current_secondary_frame->uploaded_ui_thread != nullptr) { + glWaitSync(current_secondary_frame->uploaded_ui_thread.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); + current_secondary_frame->uploaded_ui_thread.reset(); + } + check_error(); current_chain->render_to_screen(); @@ -415,6 +484,7 @@ void JPEGFrameView::paintGL() overlay_input->set_width(overlay_width); overlay_input->set_height(overlay_height); overlay_input->set_pixel_data(overlay_image->bits()); + overlay_input_needs_refresh = false; } glViewport(gl_width - overlay_width, 0, overlay_width, overlay_height); overlay_chain->render_to_screen(); @@ -436,6 +506,8 @@ void JPEGFrameView::setDecodedFrame(shared_ptr frame, shared_ptr s } else { current_chain = ycbcr_converter->prepare_chain_for_conversion(frame); } + ++metric_jpeg_prepared_frames; + displayed_this_frame = false; update(); }); } @@ -454,10 +526,25 @@ void JPEGFrameView::set_overlay(const string &text) return; } + // Figure out how large the texture needs to be. + { + QImage img(overlay_width, overlay_height, QImage::Format_Grayscale8); + QPainter painter(&img); + QFont font = painter.font(); + font.setPointSize(12); + QFontMetrics metrics(font); + overlay_base_width = lrint(metrics.boundingRect(QString::fromStdString(text)).width() + 8.0); + overlay_base_height = lrint(metrics.height()); + } + float dpr = QGuiApplication::primaryScreen()->devicePixelRatio(); overlay_width = lrint(overlay_base_width * dpr); overlay_height = lrint(overlay_base_height * dpr); + // Work around OpenGL alignment issues. + while (overlay_width % 4 != 0) ++overlay_width; + + // Now do the actual drawing. overlay_image.reset(new QImage(overlay_width, overlay_height, QImage::Format_Grayscale8)); overlay_image->setDevicePixelRatio(dpr); overlay_image->fill(0); @@ -479,16 +566,17 @@ shared_ptr get_black_frame() static shared_ptr black_frame; static once_flag flag; call_once(flag, [] { - black_frame.reset(new Frame); - black_frame->y.reset(new uint8_t[global_flags.width * global_flags.height]); - black_frame->cb.reset(new uint8_t[(global_flags.width / 2) * (global_flags.height / 2)]); - black_frame->cr.reset(new uint8_t[(global_flags.width / 2) * (global_flags.height / 2)]); - black_frame->width = global_flags.width; - black_frame->height = global_flags.height; - black_frame->chroma_subsampling_x = 2; - black_frame->chroma_subsampling_y = 2; - black_frame->pitch_y = global_flags.width; - black_frame->pitch_chroma = global_flags.width / 2; + // Not really black, but whatever. :-) + uint8_t black[] = { 0, 0, 0, 255 }; + RefCountedTexture black_tex = create_texture_2d(1, 1, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, black); + + black_frame->y = black_tex; + black_frame->cb = black_tex; + black_frame->cr = move(black_tex); + black_frame->width = 1; + black_frame->height = 1; + black_frame->chroma_subsampling_x = 1; + black_frame->chroma_subsampling_y = 1; }); ++metric_jpeg_software_fail_frames; return black_frame;