From 1df7849fad1d0647a02951abaa60f9e4f40ce360 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 11 Jun 2017 20:07:00 +0200 Subject: [PATCH] Add exported metrics for all the latency measurements. --- decklink_output.cpp | 3 ++- decklink_output.h | 2 ++ metrics.cpp | 10 ++++++++++ metrics.h | 1 + print_latency.cpp | 41 +++++++++++++++++++++++++++++++++++----- print_latency.h | 9 ++++++++- quicksync_encoder.cpp | 7 +++++-- quicksync_encoder_impl.h | 1 + x264_encoder.cpp | 4 +++- x264_encoder.h | 1 + 10 files changed, 69 insertions(+), 10 deletions(-) diff --git a/decklink_output.cpp b/decklink_output.cpp index f2ac40b..d846b96 100644 --- a/decklink_output.cpp +++ b/decklink_output.cpp @@ -22,6 +22,7 @@ DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, u : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index) { chroma_subsampler.reset(new ChromaSubsampler(resource_pool)); + latency_histogram.init("decklink_output"); } void DeckLinkOutput::set_device(IDeckLink *decklink) @@ -395,7 +396,7 @@ HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *co } static int frameno = 0; - print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno); + print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram); { lock_guard lock(frame_queue_mutex); diff --git a/decklink_output.h b/decklink_output.h index a295496..ba2318a 100644 --- a/decklink_output.h +++ b/decklink_output.h @@ -150,6 +150,8 @@ private: GLuint uyvy_vbo; // Holds position and texcoord data. GLuint uyvy_program_num; // Owned by . GLuint uyvy_position_attribute_index, uyvy_texcoord_attribute_index; + + LatencyHistogram latency_histogram; }; #endif // !defined(_DECKLINK_OUTPUT_H) diff --git a/metrics.cpp b/metrics.cpp index 3468ed7..1f3f8a1 100644 --- a/metrics.cpp +++ b/metrics.cpp @@ -1,6 +1,7 @@ #include "metrics.h" #include +#include #include #include @@ -119,6 +120,15 @@ void Histogram::init_uniform(size_t num_buckets) } } +void Histogram::init_geometric(double min, double max, size_t num_buckets) +{ + this->num_buckets = num_buckets; + buckets.reset(new Bucket[num_buckets]); + for (size_t i = 0; i < num_buckets; ++i) { + buckets[i].val = min * pow(max / min, double(i) / (num_buckets - 1)); + } +} + void Histogram::count_event(double val) { Bucket ref_bucket; diff --git a/metrics.h b/metrics.h index a570723..a815658 100644 --- a/metrics.h +++ b/metrics.h @@ -72,6 +72,7 @@ class Histogram { public: void init(const std::vector &bucket_vals); void init_uniform(size_t num_buckets); // Sets up buckets 0..(N-1). + void init_geometric(double min, double max, size_t num_buckets); void count_event(double val); std::string serialize(const std::string &name, const std::vector> &labels) const; diff --git a/print_latency.cpp b/print_latency.cpp index ee9470c..a80ac4f 100644 --- a/print_latency.cpp +++ b/print_latency.cpp @@ -1,6 +1,7 @@ #include "print_latency.h" #include "flags.h" +#include "metrics.h" #include #include @@ -22,18 +23,48 @@ ReceivedTimestamps find_received_timestamp(const vector &input_ return { min_ts, max_ts }; } -void print_latency(const string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno) +void LatencyHistogram::init(const string &measuring_point) +{ + histogram_lowest_latency_input.init_geometric(0.001, 10.0, 30); + histogram_highest_latency_input.init_geometric(0.001, 10.0, 30); + histogram_lowest_latency_input_bframe.init_geometric(0.001, 10.0, 30); + histogram_highest_latency_input_bframe.init_geometric(0.001, 10.0, 30); + + global_metrics.add("latency_seconds", + {{ "measuring_point", measuring_point }, { "input", "lowest_latency" }, { "frame_type", "i/p" }}, + &histogram_lowest_latency_input); + global_metrics.add("latency_seconds", + {{ "measuring_point", measuring_point }, { "input", "highest_latency" }, { "frame_type", "i/p" }}, + &histogram_highest_latency_input); + global_metrics.add("latency_seconds", + {{ "measuring_point", measuring_point }, { "input", "lowest_latency" }, { "frame_type", "b" }}, + &histogram_lowest_latency_input_bframe); + global_metrics.add("latency_seconds", + {{ "measuring_point", measuring_point }, { "input", "highest_latency" }, { "frame_type", "b" }}, + &histogram_highest_latency_input_bframe); +} + +void print_latency(const string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno, LatencyHistogram *histogram) { if (received_ts.max_ts == steady_clock::time_point::min()) return; + const steady_clock::time_point now = steady_clock::now(); + duration lowest_latency = now - received_ts.max_ts; + duration highest_latency = now - received_ts.min_ts; + + if (is_b_frame) { + histogram->histogram_lowest_latency_input_bframe.count_event(lowest_latency.count()); + histogram->histogram_highest_latency_input_bframe.count_event(highest_latency.count()); + } else { + histogram->histogram_lowest_latency_input.count_event(lowest_latency.count()); + histogram->histogram_highest_latency_input.count_event(highest_latency.count()); + } + // 101 is chosen so that it's prime, which is unlikely to get the same frame type every time. if (global_flags.print_video_latency && (++*frameno % 101) == 0) { - const steady_clock::time_point now = steady_clock::now(); printf("%-60s %4.0f ms (lowest-latency input), %4.0f ms (highest-latency input)", - header.c_str(), - 1e3 * std::chrono::duration(now - received_ts.max_ts).count(), - 1e3 * std::chrono::duration(now - received_ts.min_ts).count()); + header.c_str(), 1e3 * lowest_latency.count(), 1e3 * highest_latency.count()); if (is_b_frame) { printf(" [on B-frame; potential extra latency]\n"); diff --git a/print_latency.h b/print_latency.h index 0e880ca..1c937c5 100644 --- a/print_latency.h +++ b/print_latency.h @@ -10,6 +10,7 @@ #include #include "ref_counted_frame.h" +#include "metrics.h" // Since every output frame is based on multiple input frames, we need // more than one start timestamp. For now, we keep just the smallest @@ -18,9 +19,15 @@ struct ReceivedTimestamps { std::chrono::steady_clock::time_point min_ts, max_ts; }; +struct LatencyHistogram { + void init(const std::string &measuring_point); // Initializes histograms and registers them in global_metrics. + + Histogram histogram_lowest_latency_input, histogram_highest_latency_input; + Histogram histogram_lowest_latency_input_bframe, histogram_highest_latency_input_bframe; +}; ReceivedTimestamps find_received_timestamp(const std::vector &input_frames); -void print_latency(const std::string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno); +void print_latency(const std::string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno, LatencyHistogram *histogram); #endif // !defined(_PRINT_LATENCY_H) diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 8cf8cf3..1821f5b 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -1414,7 +1414,7 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task) static int frameno = 0; print_latency("Current Quick Sync latency (video inputs → disk mux):", - task.received_ts, (task.frame_type == FRAME_B), &frameno); + task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram); { // Add video. @@ -1561,6 +1561,9 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource memset(&slice_param, 0, sizeof(slice_param)); } + mixer_latency_histogram.init("mixer"); + qs_latency_histogram.init("quick_sync"); + storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this); encode_thread = thread([this]{ @@ -1962,7 +1965,7 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); static int frameno = 0; print_latency("Current mixer latency (video inputs → ready for encode):", - received_ts, false, &frameno); + received_ts, false, &frameno, &mixer_latency_histogram); // Release back any input frames we needed to render this frame. frame.input_frames.clear(); diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h index 917420c..f8243ff 100644 --- a/quicksync_encoder_impl.h +++ b/quicksync_encoder_impl.h @@ -222,6 +222,7 @@ private: int frame_height_mbaligned; DiskSpaceEstimator *disk_space_estimator; + LatencyHistogram mixer_latency_histogram, qs_latency_histogram; }; #endif // !defined(_QUICKSYNC_ENCODER_IMPL_H) diff --git a/x264_encoder.cpp b/x264_encoder.cpp index 08f6c91..955491e 100644 --- a/x264_encoder.cpp +++ b/x264_encoder.cpp @@ -68,6 +68,8 @@ X264Encoder::X264Encoder(AVOutputFormat *oformat) metric_x264_crf.init_uniform(50); global_metrics.add("x264_crf", &metric_x264_crf); + + latency_histogram.init("x264"); } X264Encoder::~X264Encoder() @@ -375,7 +377,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) static int frameno = 0; print_latency("Current x264 latency (video inputs → network mux):", received_ts, (pic.i_type == X264_TYPE_B || pic.i_type == X264_TYPE_BREF), - &frameno); + &frameno, &latency_histogram); } else { assert(false); } diff --git a/x264_encoder.h b/x264_encoder.h index b8a3562..2f9c81c 100644 --- a/x264_encoder.h +++ b/x264_encoder.h @@ -124,6 +124,7 @@ private: std::atomic metric_x264_output_frames_p{0}; std::atomic metric_x264_output_frames_b{0}; Histogram metric_x264_crf; + LatencyHistogram latency_histogram; }; #endif // !defined(_X264ENCODE_H) -- 2.39.2