From 1df7849fad1d0647a02951abaa60f9e4f40ce360 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sun, 11 Jun 2017 20:07:00 +0200
Subject: [PATCH] Add exported metrics for all the latency measurements.

---
 decklink_output.cpp      |  3 ++-
 decklink_output.h        |  2 ++
 metrics.cpp              | 10 ++++++++++
 metrics.h                |  1 +
 print_latency.cpp        | 41 +++++++++++++++++++++++++++++++++++-----
 print_latency.h          |  9 ++++++++-
 quicksync_encoder.cpp    |  7 +++++--
 quicksync_encoder_impl.h |  1 +
 x264_encoder.cpp         |  4 +++-
 x264_encoder.h           |  1 +
 10 files changed, 69 insertions(+), 10 deletions(-)
diff --git a/decklink_output.cpp b/decklink_output.cpp
index f2ac40b..d846b96 100644
--- a/decklink_output.cpp
+++ b/decklink_output.cpp
@@ -22,6 +22,7 @@ DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, u
 	: resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index)
 {
 	chroma_subsampler.reset(new ChromaSubsampler(resource_pool));
+	latency_histogram.init("decklink_output");
 }
 
 void DeckLinkOutput::set_device(IDeckLink *decklink)
@@ -395,7 +396,7 @@ HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *co
 	}
 
 	static int frameno = 0;
-	print_latency("DeckLink output latency (frame received â output on HDMI):", frame->received_ts, false, &frameno);
+	print_latency("DeckLink output latency (frame received â output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram);
 
 	{
 		lock_guard<mutex> lock(frame_queue_mutex);
diff --git a/decklink_output.h b/decklink_output.h
index a295496..ba2318a 100644
--- a/decklink_output.h
+++ b/decklink_output.h
@@ -150,6 +150,8 @@ private:
 	GLuint uyvy_vbo;  // Holds position and texcoord data.
 	GLuint uyvy_program_num;  // Owned by <resource_pool>.
 	GLuint uyvy_position_attribute_index, uyvy_texcoord_attribute_index;
+
+	LatencyHistogram latency_histogram;
 };
 
 #endif  // !defined(_DECKLINK_OUTPUT_H)
diff --git a/metrics.cpp b/metrics.cpp
index 3468ed7..1f3f8a1 100644
--- a/metrics.cpp
+++ b/metrics.cpp
@@ -1,6 +1,7 @@
 #include "metrics.h"
 
 #include <assert.h>
+#include <math.h>
 
 #include <algorithm>
 #include <locale>
@@ -119,6 +120,15 @@ void Histogram::init_uniform(size_t num_buckets)
 	}
 }
 
+void Histogram::init_geometric(double min, double max, size_t num_buckets)
+{
+	this->num_buckets = num_buckets;
+	buckets.reset(new Bucket[num_buckets]);
+	for (size_t i = 0; i < num_buckets; ++i) {
+		buckets[i].val = min * pow(max / min, double(i) / (num_buckets - 1));
+	}
+}
+
 void Histogram::count_event(double val)
 {
 	Bucket ref_bucket;
diff --git a/metrics.h b/metrics.h
index a570723..a815658 100644
--- a/metrics.h
+++ b/metrics.h
@@ -72,6 +72,7 @@ class Histogram {
 public:
 	void init(const std::vector<double> &bucket_vals);
 	void init_uniform(size_t num_buckets);  // Sets up buckets 0..(N-1).
+	void init_geometric(double min, double max, size_t num_buckets);
 	void count_event(double val);
 	std::string serialize(const std::string &name, const std::vector<std::pair<std::string, std::string>> &labels) const;
 
diff --git a/print_latency.cpp b/print_latency.cpp
index ee9470c..a80ac4f 100644
--- a/print_latency.cpp
+++ b/print_latency.cpp
@@ -1,6 +1,7 @@
 #include "print_latency.h"
 
 #include "flags.h"
+#include "metrics.h"
 
 #include <stdio.h>
 #include <chrono>
@@ -22,18 +23,48 @@ ReceivedTimestamps find_received_timestamp(const vector<RefCountedFrame> &input_
 	return { min_ts, max_ts };
 }
 
-void print_latency(const string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno)
+void LatencyHistogram::init(const string &measuring_point)
+{
+	histogram_lowest_latency_input.init_geometric(0.001, 10.0, 30);
+	histogram_highest_latency_input.init_geometric(0.001, 10.0, 30);
+	histogram_lowest_latency_input_bframe.init_geometric(0.001, 10.0, 30);
+	histogram_highest_latency_input_bframe.init_geometric(0.001, 10.0, 30);
+
+	global_metrics.add("latency_seconds",
+		{{ "measuring_point", measuring_point }, { "input", "lowest_latency" }, { "frame_type", "i/p" }},
+		&histogram_lowest_latency_input);
+	global_metrics.add("latency_seconds",
+		{{ "measuring_point", measuring_point }, { "input", "highest_latency" }, { "frame_type", "i/p" }},
+		&histogram_highest_latency_input);
+	global_metrics.add("latency_seconds",
+		{{ "measuring_point", measuring_point }, { "input", "lowest_latency" }, { "frame_type", "b" }},
+		&histogram_lowest_latency_input_bframe);
+	global_metrics.add("latency_seconds",
+		{{ "measuring_point", measuring_point }, { "input", "highest_latency" }, { "frame_type", "b" }},
+		&histogram_highest_latency_input_bframe);
+}
+
+void print_latency(const string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno, LatencyHistogram *histogram)
 {
 	if (received_ts.max_ts == steady_clock::time_point::min())
 		return;
 
+	const steady_clock::time_point now = steady_clock::now();
+	duration<double> lowest_latency = now - received_ts.max_ts;
+	duration<double> highest_latency = now - received_ts.min_ts;
+
+	if (is_b_frame) {
+		histogram->histogram_lowest_latency_input_bframe.count_event(lowest_latency.count());
+		histogram->histogram_highest_latency_input_bframe.count_event(highest_latency.count());
+	} else {
+		histogram->histogram_lowest_latency_input.count_event(lowest_latency.count());
+		histogram->histogram_highest_latency_input.count_event(highest_latency.count());
+	}
+
 	// 101 is chosen so that it's prime, which is unlikely to get the same frame type every time.
 	if (global_flags.print_video_latency && (++*frameno % 101) == 0) {
-		const steady_clock::time_point now = steady_clock::now();
 		printf("%-60s %4.0f ms (lowest-latency input), %4.0f ms (highest-latency input)",
-			header.c_str(),
-			1e3 * std::chrono::duration<double>(now - received_ts.max_ts).count(),
-			1e3 * std::chrono::duration<double>(now - received_ts.min_ts).count());
+			header.c_str(), 1e3 * lowest_latency.count(), 1e3 * highest_latency.count());
 
 		if (is_b_frame) {
 			printf("  [on B-frame; potential extra latency]\n");
diff --git a/print_latency.h b/print_latency.h
index 0e880ca..1c937c5 100644
--- a/print_latency.h
+++ b/print_latency.h
@@ -10,6 +10,7 @@
 #include <vector>
 
 #include "ref_counted_frame.h"
+#include "metrics.h"
 
 // Since every output frame is based on multiple input frames, we need
 // more than one start timestamp. For now, we keep just the smallest
@@ -18,9 +19,15 @@
 struct ReceivedTimestamps {
 	std::chrono::steady_clock::time_point min_ts, max_ts;
 };
+struct LatencyHistogram {
+	void init(const std::string &measuring_point);  // Initializes histograms and registers them in global_metrics.
+
+	Histogram histogram_lowest_latency_input, histogram_highest_latency_input;
+	Histogram histogram_lowest_latency_input_bframe, histogram_highest_latency_input_bframe;
+};
 
 ReceivedTimestamps find_received_timestamp(const std::vector<RefCountedFrame> &input_frames);
 
-void print_latency(const std::string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno);
+void print_latency(const std::string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno, LatencyHistogram *histogram);
 
 #endif  // !defined(_PRINT_LATENCY_H)
diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp
index 8cf8cf3..1821f5b 100644
--- a/quicksync_encoder.cpp
+++ b/quicksync_encoder.cpp
@@ -1414,7 +1414,7 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task)
 
 	static int frameno = 0;
 	print_latency("Current Quick Sync latency (video inputs â disk mux):",
-		task.received_ts, (task.frame_type == FRAME_B), &frameno);
+		task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram);
 
 	{
 		// Add video.
@@ -1561,6 +1561,9 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
 		memset(&slice_param, 0, sizeof(slice_param));
 	}
 
+	mixer_latency_histogram.init("mixer");
+	qs_latency_histogram.init("quick_sync");
+
 	storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
 
 	encode_thread = thread([this]{
@@ -1962,7 +1965,7 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame,
 	ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames);
 	static int frameno = 0;
 	print_latency("Current mixer latency (video inputs â ready for encode):",
-		received_ts, false, &frameno);
+		received_ts, false, &frameno, &mixer_latency_histogram);
 
 	// Release back any input frames we needed to render this frame.
 	frame.input_frames.clear();
diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h
index 917420c..f8243ff 100644
--- a/quicksync_encoder_impl.h
+++ b/quicksync_encoder_impl.h
@@ -222,6 +222,7 @@ private:
 	int frame_height_mbaligned;
 
 	DiskSpaceEstimator *disk_space_estimator;
+	LatencyHistogram mixer_latency_histogram, qs_latency_histogram;
 };
 
 #endif  // !defined(_QUICKSYNC_ENCODER_IMPL_H)
diff --git a/x264_encoder.cpp b/x264_encoder.cpp
index 08f6c91..955491e 100644
--- a/x264_encoder.cpp
+++ b/x264_encoder.cpp
@@ -68,6 +68,8 @@ X264Encoder::X264Encoder(AVOutputFormat *oformat)
 
 	metric_x264_crf.init_uniform(50);
 	global_metrics.add("x264_crf", &metric_x264_crf);
+
+	latency_histogram.init("x264");
 }
 
 X264Encoder::~X264Encoder()
@@ -375,7 +377,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
 		static int frameno = 0;
 		print_latency("Current x264 latency (video inputs â network mux):",
 			received_ts, (pic.i_type == X264_TYPE_B || pic.i_type == X264_TYPE_BREF),
-			&frameno);
+			&frameno, &latency_histogram);
 	} else {
 		assert(false);
 	}
diff --git a/x264_encoder.h b/x264_encoder.h
index b8a3562..2f9c81c 100644
--- a/x264_encoder.h
+++ b/x264_encoder.h
@@ -124,6 +124,7 @@ private:
 	std::atomic<int64_t> metric_x264_output_frames_p{0};
 	std::atomic<int64_t> metric_x264_output_frames_b{0};
 	Histogram metric_x264_crf;
+	LatencyHistogram latency_histogram;
 };
 
 #endif  // !defined(_X264ENCODE_H)
-- 
2.39.2