]> git.sesse.net Git - nageru/commitdiff
Add summaries for tracking fade and interpolation latency across the GPU.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 29 Feb 2020 23:32:15 +0000 (00:32 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 29 Feb 2020 23:32:15 +0000 (00:32 +0100)
futatabi/video_stream.cpp
futatabi/video_stream.h

index 987bef542ac45c3b9ba623320b4a298ac96e507c..5a36801b5e6ebf787cc5f84874650d63db2862fb 100644 (file)
@@ -14,6 +14,7 @@ extern "C" {
 #include "player.h"
 #include "shared/context.h"
 #include "shared/httpd.h"
+#include "shared/metrics.h"
 #include "shared/shared_defs.h"
 #include "shared/mux.h"
 #include "util.h"
@@ -27,6 +28,17 @@ using namespace movit;
 using namespace std;
 using namespace std::chrono;
 
+namespace {
+
+once_flag video_metrics_inited;
+Summary metric_jpeg_encode_time_seconds;
+Summary metric_fade_latency_seconds;
+Summary metric_interpolation_latency_seconds;
+Summary metric_fade_fence_wait_time_seconds;
+Summary metric_interpolation_fence_wait_time_seconds;
+
+}  // namespace
+
 extern HTTPD *global_httpd;
 
 struct VectorDestinationManager {
@@ -83,6 +95,7 @@ static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
 
 string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t *cr_data, unsigned width, unsigned height, const string exif_data)
 {
+       steady_clock::time_point start = steady_clock::now();
        VectorDestinationManager dest;
 
        jpeg_compress_struct cinfo;
@@ -133,12 +146,29 @@ string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t
        jpeg_finish_compress(&cinfo);
        jpeg_destroy_compress(&cinfo);
 
+       steady_clock::time_point stop = steady_clock::now();
+       metric_jpeg_encode_time_seconds.count_event(duration<double>(stop - start).count());
+
        return move(dest.dest);
 }
 
 VideoStream::VideoStream(AVFormatContext *file_avctx)
        : avctx(file_avctx), output_fast_forward(file_avctx != nullptr)
 {
+       call_once(video_metrics_inited, [] {
+               vector<double> quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 };
+               metric_jpeg_encode_time_seconds.init(quantiles, 60.0);
+               global_metrics.add("jpeg_encode_time_seconds", &metric_jpeg_encode_time_seconds);
+               metric_fade_fence_wait_time_seconds.init(quantiles, 60.0);
+               global_metrics.add("fade_fence_wait_time_seconds", &metric_fade_fence_wait_time_seconds);
+               metric_interpolation_fence_wait_time_seconds.init(quantiles, 60.0);
+               global_metrics.add("interpolation_fence_wait_time_seconds", &metric_interpolation_fence_wait_time_seconds);
+               metric_fade_latency_seconds.init(quantiles, 60.0);
+               global_metrics.add("fade_latency_seconds", &metric_fade_latency_seconds);
+               metric_interpolation_latency_seconds.init(quantiles, 60.0);
+               global_metrics.add("interpolation_latency_seconds", &metric_interpolation_latency_seconds);
+       });
+
        ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_DUAL_YCBCR, /*resource_pool=*/nullptr));
        ycbcr_semiplanar_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_SEMIPLANAR, /*resource_pool=*/nullptr));
 
@@ -432,6 +462,7 @@ void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64
        // Set a fence we can wait for to make sure the CPU sees the read.
        glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
        check_error();
+       qf.fence_created = steady_clock::now();
        qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
        check_error();
        qf.resources = move(resources);
@@ -572,6 +603,7 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
        // Set a fence we can wait for to make sure the CPU sees the read.
        glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
        check_error();
+       qf.fence_created = steady_clock::now();
        qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
        check_error();
        qf.resources = move(resources);
@@ -713,7 +745,11 @@ void VideoStream::encode_thread_func()
 
                        add_audio_or_silence(qf);
                } else if (qf.type == QueuedFrame::FADED) {
+                       steady_clock::time_point start = steady_clock::now();
                        glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+                       steady_clock::time_point stop = steady_clock::now();
+                       metric_fade_fence_wait_time_seconds.count_event(duration<double>(stop - start).count());
+                       metric_fade_latency_seconds.count_event(duration<double>(stop - qf.fence_created).count());
 
                        shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
                        assert(frame->exif_data.empty());
@@ -732,7 +768,11 @@ void VideoStream::encode_thread_func()
 
                        add_audio_or_silence(qf);
                } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
+                       steady_clock::time_point start = steady_clock::now();
                        glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+                       steady_clock::time_point stop = steady_clock::now();
+                       metric_interpolation_fence_wait_time_seconds.count_event(duration<double>(stop - start).count());
+                       metric_interpolation_latency_seconds.count_event(duration<double>(stop - qf.fence_created).count());
 
                        // Send it on to display.
                        shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
index a6215e963e384829c5ca8c2c8753dffb6d28fa57..c83a4849d07286cb3719bc906d6c96aae529651c 100644 (file)
@@ -135,6 +135,7 @@ private:
                float alpha;
                BorrowedInterpolatedFrameResources resources;
                RefCountedGLsync fence;  // Set when the interpolated image is read back to the CPU.
+               std::chrono::steady_clock::time_point fence_created;
                GLuint flow_tex, output_tex, cbcr_tex;  // Released in the receiving thread; not really used for anything else. flow_tex will typically even be from a previous frame.
                FrameOnDisk id;