From 1a23c2c0194b81c3c73620a26ac93bf62f3e5ee0 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 1 Mar 2020 00:32:15 +0100 Subject: [PATCH] Add summaries for tracking fade and interpolation latency across the GPU. --- futatabi/video_stream.cpp | 40 +++++++++++++++++++++++++++++++++++++++ futatabi/video_stream.h | 1 + 2 files changed, 41 insertions(+) diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp index 987bef5..5a36801 100644 --- a/futatabi/video_stream.cpp +++ b/futatabi/video_stream.cpp @@ -14,6 +14,7 @@ extern "C" { #include "player.h" #include "shared/context.h" #include "shared/httpd.h" +#include "shared/metrics.h" #include "shared/shared_defs.h" #include "shared/mux.h" #include "util.h" @@ -27,6 +28,17 @@ using namespace movit; using namespace std; using namespace std::chrono; +namespace { + +once_flag video_metrics_inited; +Summary metric_jpeg_encode_time_seconds; +Summary metric_fade_latency_seconds; +Summary metric_interpolation_latency_seconds; +Summary metric_fade_fence_wait_time_seconds; +Summary metric_interpolation_fence_wait_time_seconds; + +} // namespace + extern HTTPD *global_httpd; struct VectorDestinationManager { @@ -83,6 +95,7 @@ static_assert(std::is_standard_layout::value, ""); string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t *cr_data, unsigned width, unsigned height, const string exif_data) { + steady_clock::time_point start = steady_clock::now(); VectorDestinationManager dest; jpeg_compress_struct cinfo; @@ -133,12 +146,29 @@ string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t jpeg_finish_compress(&cinfo); jpeg_destroy_compress(&cinfo); + steady_clock::time_point stop = steady_clock::now(); + metric_jpeg_encode_time_seconds.count_event(duration(stop - start).count()); + return move(dest.dest); } VideoStream::VideoStream(AVFormatContext *file_avctx) : avctx(file_avctx), output_fast_forward(file_avctx != nullptr) { + call_once(video_metrics_inited, [] { + vector quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 }; + metric_jpeg_encode_time_seconds.init(quantiles, 60.0); + global_metrics.add("jpeg_encode_time_seconds", &metric_jpeg_encode_time_seconds); + metric_fade_fence_wait_time_seconds.init(quantiles, 60.0); + global_metrics.add("fade_fence_wait_time_seconds", &metric_fade_fence_wait_time_seconds); + metric_interpolation_fence_wait_time_seconds.init(quantiles, 60.0); + global_metrics.add("interpolation_fence_wait_time_seconds", &metric_interpolation_fence_wait_time_seconds); + metric_fade_latency_seconds.init(quantiles, 60.0); + global_metrics.add("fade_latency_seconds", &metric_fade_latency_seconds); + metric_interpolation_latency_seconds.init(quantiles, 60.0); + global_metrics.add("interpolation_latency_seconds", &metric_interpolation_latency_seconds); + }); + ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_DUAL_YCBCR, /*resource_pool=*/nullptr)); ycbcr_semiplanar_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_SEMIPLANAR, /*resource_pool=*/nullptr)); @@ -432,6 +462,7 @@ void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64 // Set a fence we can wait for to make sure the CPU sees the read. glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); check_error(); + qf.fence_created = steady_clock::now(); qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); check_error(); qf.resources = move(resources); @@ -572,6 +603,7 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts // Set a fence we can wait for to make sure the CPU sees the read. glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); check_error(); + qf.fence_created = steady_clock::now(); qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); check_error(); qf.resources = move(resources); @@ -713,7 +745,11 @@ void VideoStream::encode_thread_func() add_audio_or_silence(qf); } else if (qf.type == QueuedFrame::FADED) { + steady_clock::time_point start = steady_clock::now(); glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); + steady_clock::time_point stop = steady_clock::now(); + metric_fade_fence_wait_time_seconds.count_event(duration(stop - start).count()); + metric_fade_latency_seconds.count_event(duration(stop - qf.fence_created).count()); shared_ptr frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height); assert(frame->exif_data.empty()); @@ -732,7 +768,11 @@ void VideoStream::encode_thread_func() add_audio_or_silence(qf); } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) { + steady_clock::time_point start = steady_clock::now(); glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); + steady_clock::time_point stop = steady_clock::now(); + metric_interpolation_fence_wait_time_seconds.count_event(duration(stop - start).count()); + metric_interpolation_latency_seconds.count_event(duration(stop - qf.fence_created).count()); // Send it on to display. shared_ptr frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height); diff --git a/futatabi/video_stream.h b/futatabi/video_stream.h index a6215e9..c83a484 100644 --- a/futatabi/video_stream.h +++ b/futatabi/video_stream.h @@ -135,6 +135,7 @@ private: float alpha; BorrowedInterpolatedFrameResources resources; RefCountedGLsync fence; // Set when the interpolated image is read back to the CPU. + std::chrono::steady_clock::time_point fence_created; GLuint flow_tex, output_tex, cbcr_tex; // Released in the receiving thread; not really used for anything else. flow_tex will typically even be from a previous frame. FrameOnDisk id; -- 2.39.2