Add a switch to print video latency.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 10 Dec 2016 14:58:35 +0000 (15:58 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 10 Dec 2016 14:58:35 +0000 (15:58 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 10 Dec 2016 14:58:35 +0000 (15:58 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 10 Dec 2016 14:58:35 +0000 (15:58 +0100)
diff --git a/Makefile b/Makefile

index 6b7925402c2c785fbf6c07f6dbc1a40bd57748f8..0380d0d53076531fe6322c6129c47bb14bd3f4da 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ OBJS += midi_mapper.o midi_mapping.pb.o
  
  # Mixer objects
  AUDIO_MIXER_OBJS = audio_mixer.o alsa_input.o alsa_pool.o ebu_r128_proc.o stereocompressor.o resampling_queue.o flags.o correlation_measurer.o filter.o input_mapping.o state.pb.o
-OBJS += mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o httpd.o flags.o image_input.o alsa_output.o disk_space_estimator.o $(AUDIO_MIXER_OBJS)
+OBJS += mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o httpd.o flags.o image_input.o alsa_output.o disk_space_estimator.o print_latency.o $(AUDIO_MIXER_OBJS)
  
  # Streaming and encoding objects
  OBJS += quicksync_encoder.o x264_encoder.o x264_speed_control.o video_encoder.o metacube2.o mux.o audio_encoder.o ffmpeg_raii.o
diff --git a/README b/README

index 664ba9d58dcc9f28866f7026b858bf6130223f83..c1862af77d2876166614b137bfda0c7463efc2f4 100644 (file)
--- a/README
+++ b/README
@@ -76,10 +76,9 @@ with:
      libmovit-dev libegl1-mesa-dev libasound2-dev libx264-dev libbmusb-dev \
      protobuf-compiler libprotobuf-dev libpci-dev
  
-Exceptions as of November 2016:
+Exceptions as of December 2016:
  
-  - libusb 1.0.21 is not yet in stretch; you need to fetch it from sid.
-    Same with a bmusb version built against it.
+  - bmusb 0.5.3 is not yet in sid.
  
  
  The patches/ directory contains a patch that helps zita-resampler performance.
diff --git a/bmusb b/bmusb

index a765e066b74ac52ff0abf239d430d6f8d83f792e..227166d4398263e1191bdfe244f62bc38aaf4c1f 160000 (submodule)
--- a/bmusb
+++ b/bmusb
@@ -1 +1 @@
-Subproject commit a765e066b74ac52ff0abf239d430d6f8d83f792e
+Subproject commit 227166d4398263e1191bdfe244f62bc38aaf4c1f
diff --git a/decklink_capture.cpp b/decklink_capture.cpp

index 505083ff056a72018069ec2e3ae9e2573e5ab51a..5e41da436ef71b0920495e38137390661f4b09fb 100644 (file)
--- a/decklink_capture.cpp
+++ b/decklink_capture.cpp
@@ -12,6 +12,7 @@
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+#include <chrono>
  #include <cstdint>
  #include <utility>
  #include <vector>
@@ -21,6 +22,7 @@
  #define FRAME_SIZE (8 << 20)  // 8 MB.
  
  using namespace std;
+using namespace std::chrono;
  using namespace std::placeholders;
  using namespace bmusb;
  
@@ -353,6 +355,8 @@ HRESULT STDMETHODCALLTYPE DeckLinkCapture::VideoInputFrameArrived(
  
                         video_format.width = width;
                         video_format.height = height;
+
+                       current_video_frame.received_timestamp = steady_clock::now();
                 }
         }
  
@@ -369,6 +373,8 @@ HRESULT STDMETHODCALLTYPE DeckLinkCapture::VideoInputFrameArrived(
  
                         audio_format.bits_per_sample = 32;
                         audio_format.num_channels = 2;
+
+                       current_audio_frame.received_timestamp = steady_clock::now();
                 }
         }
  
diff --git a/flags.cpp b/flags.cpp

index 279ea426e92d0644a4a1eb82cd00edd1fc6df3e9..239a688e428e4871296c8ed9644e8ddda817ffa7 100644 (file)
--- a/flags.cpp
+++ b/flags.cpp
@@ -44,7 +44,8 @@ enum LongOption {
         OPTION_DISABLE_MAKEUP_GAIN_AUTO,
         OPTION_ENABLE_MAKEUP_GAIN_AUTO,
         OPTION_DISABLE_ALSA_OUTPUT,
-       OPTION_NO_FLUSH_PBOS
+       OPTION_NO_FLUSH_PBOS,
+       OPTION_PRINT_VIDEO_LATENCY
  };
  
  void usage()
@@ -98,6 +99,7 @@ void usage()
         fprintf(stderr, "      --no-flush-pbos             do not explicitly signal texture data uploads\n");
         fprintf(stderr, "                                    (will give display corruption, but makes it\n");
         fprintf(stderr, "                                    possible to run with apitrace in real time)\n");
+       fprintf(stderr, "      --print-video-latency       print out measurements of video latency on stdout\n");
  }
  
  void parse_flags(int argc, char * const argv[])
@@ -143,6 +145,7 @@ void parse_flags(int argc, char * const argv[])
                 { "enable-makeup-gain-auto", no_argument, 0, OPTION_ENABLE_MAKEUP_GAIN_AUTO },
                 { "disable-alsa-output", no_argument, 0, OPTION_DISABLE_ALSA_OUTPUT },
                 { "no-flush-pbos", no_argument, 0, OPTION_NO_FLUSH_PBOS },
+               { "print-video-latency", no_argument, 0, OPTION_PRINT_VIDEO_LATENCY },
                 { 0, 0, 0, 0 }
         };
         vector<string> theme_dirs;
@@ -292,6 +295,9 @@ void parse_flags(int argc, char * const argv[])
                 case OPTION_NO_FLUSH_PBOS:
                         global_flags.flush_pbos = false;
                         break;
+               case OPTION_PRINT_VIDEO_LATENCY:
+                       global_flags.print_video_latency = true;
+                       break;
                 case OPTION_HELP:
                         usage();
                         exit(0);
diff --git a/flags.h b/flags.h

index 5abc9530470cc0ac9b086c21661f22d2a1c75df6..71495816d0d22734f8560c2d1e38aa79389e3a20 100644 (file)
--- a/flags.h
+++ b/flags.h
@@ -40,6 +40,7 @@ struct Flags {
         bool multichannel_mapping_mode = false;  // Implicitly true if input_mapping_filename is nonempty.
         std::string input_mapping_filename;  // Empty for none.
         std::string midi_mapping_filename;  // Empty for none.
+       bool print_video_latency = false;
  };
  extern Flags global_flags;
  
diff --git a/mixer.cpp b/mixer.cpp

index ff9c77f49f213fd80ef268a4590b2deac89e9528..0a7ecc21a070012e65a55c8e9e51f6f436641244 100644 (file)
--- a/mixer.cpp
+++ b/mixer.cpp
@@ -511,6 +511,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                         new_frame.interlaced = video_format.interlaced;
                         new_frame.upload_func = upload_func;
                         new_frame.dropped_frames = dropped_frames;
+                       new_frame.received_timestamp = video_frame.received_timestamp;  // Ignore the audio timestamp.
                         card->new_frames.push(move(new_frame));
                         card->new_frames_changed.notify_all();
                 }
diff --git a/mixer.h b/mixer.h

index f5aa4fbdd014758faa047f21bc4dfdf701467a81..551e21790d9f71daacd89d1fc6a740baca0723a1 100644 (file)
--- a/mixer.h
+++ b/mixer.h
@@ -336,6 +336,7 @@ private:
                         unsigned field;  // Which field (0 or 1) of the frame to use. Always 0 for progressive.
                         std::function<void()> upload_func;  // Needs to be called to actually upload the texture to OpenGL.
                         unsigned dropped_frames = 0;  // Number of dropped frames before this one.
+                       std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
                 };
                 std::queue<NewFrame> new_frames;
                 bool should_quit = false;
diff --git a/print_latency.cpp b/print_latency.cpp

new file mode 100644 (file)

index 0000000..0b0a1ae
--- /dev/null
+++ b/print_latency.cpp
@@ -0,0 +1,30 @@
+#include "print_latency.h"
+#include "flags.h"
+
+#include <stdio.h>
+#include <chrono>
+#include <string>
+
+using namespace std;
+using namespace std::chrono;
+
+void print_latency(const string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno)
+{
+       if (received_ts.max_ts == steady_clock::time_point::min())
+               return;
+
+       // 101 is chosen so that it's prime, which is unlikely to get the same frame type every time.
+       if (global_flags.print_video_latency && (++*frameno % 101) == 0) {
+               const steady_clock::time_point now = steady_clock::now();
+               printf("%-60s %4.0f ms (lowest-latency input), %4.0f ms (highest-latency input)",
+                       header.c_str(),
+                       1e3 * std::chrono::duration<double>(now - received_ts.max_ts).count(),
+                       1e3 * std::chrono::duration<double>(now - received_ts.min_ts).count());
+
+               if (is_b_frame) {
+                       printf("  [on B-frame; potential extra latency]\n");
+               } else {
+                       printf("\n");
+               }
+       }
+}
diff --git a/print_latency.h b/print_latency.h

new file mode 100644 (file)

index 0000000..9d8a93b
--- /dev/null
+++ b/print_latency.h
@@ -0,0 +1,21 @@
+#ifndef _PRINT_LATENCY_H
+#define _PRINT_LATENCY_H 1
+
+// A small utility function to print the latency between two end points
+// (typically when the frame was received from the video card, and some
+// point when the frame is ready to be output in some form).
+
+#include <chrono>
+#include <string>
+
+// Since every output frame is based on multiple input frames, we need
+// more than one start timestamp. For now, we keep just the smallest
+// and largest timestamp, so that we can print out a range.
+// For both of these, steady_clock::time_point::min() is used for “not set”.
+struct ReceivedTimestamps {
+       std::chrono::steady_clock::time_point min_ts, max_ts;
+};
+
+void print_latency(const std::string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno);
+
+#endif  // !defined(_PRINT_LATENCY_H)
diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp

index 4e974223d15def72a9d348a2a4eb94039077d0e2..2ab1f10f7c5f50d73073276094e242c584ea3f48 100644 (file)
--- a/quicksync_encoder.cpp
+++ b/quicksync_encoder.cpp
@@ -18,6 +18,7 @@
  #include <va/va_enc_h264.h>
  #include <va/va_x11.h>
  #include <algorithm>
+#include <chrono>
  #include <condition_variable>
  #include <cstddef>
  #include <cstdint>
@@ -47,11 +48,13 @@ extern "C" {
  #include "ffmpeg_raii.h"
  #include "flags.h"
  #include "mux.h"
+#include "print_latency.h"
  #include "ref_counted_frame.h"
  #include "timebase.h"
  #include "x264_encoder.h"
  
  using namespace std;
+using namespace std::chrono;
  using namespace std::placeholders;
  
  class QOpenGLContext;
@@ -142,6 +145,7 @@ public:
         struct Frame {
                 int64_t pts, duration;
                 uint8_t *data;
+               ReceivedTimestamps received_ts;
  
                 // Invert to get the smallest pts first.
                 bool operator< (const Frame &other) const { return pts > other.pts; }
@@ -151,7 +155,7 @@ public:
         // Does _not_ take ownership of data; a copy is taken if needed.
         // The returned pointer is valid until the next call to reorder_frame, or destruction.
         // As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
-       Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data);
+       Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts);
  
         // The same as reorder_frame, but without inserting anything. Used to empty the queue.
         Frame get_first_frame();
@@ -179,22 +183,22 @@ FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height)
         }
  }
  
-FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data)
+FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts)
  {
         if (queue_length == 0) {
-               return Frame{pts, duration, data};
+               return Frame{pts, duration, data, received_ts};
         }
  
         assert(!freelist.empty());
         uint8_t *storage = freelist.top();
         freelist.pop();
         memcpy(storage, data, width * height * 2);
-       frames.push(Frame{pts, duration, storage});
+       frames.push(Frame{pts, duration, storage, received_ts});
  
         if (frames.size() >= queue_length) {
                 return get_first_frame();
         } else {
-               return Frame{-1, -1, nullptr};
+               return Frame{-1, -1, nullptr, steady_clock::time_point::min(), steady_clock::time_point::min()};
         }
  }
  
@@ -232,6 +236,7 @@ private:
                 int frame_type;
                 vector<float> audio;
                 int64_t pts, dts, duration;
+               ReceivedTimestamps received_ts;
         };
         struct PendingFrame {
                 RefCountedGLsync fence;
@@ -1630,6 +1635,10 @@ void QuickSyncEncoderImpl::save_codeddata(storage_task task)
         }
         vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf);
  
+       static int frameno = 0;
+       print_latency("Current QuickSync latency (video inputs → disk mux):",
+               task.received_ts, (task.frame_type == FRAME_B), &frameno);
+
         {
                 // Add video.
                 AVPacket pkt;
@@ -2034,7 +2043,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num,
                                 add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
                         } else {
                                 assert(global_flags.x264_video_to_http);
-                               x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
+                               x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts);
                         }
                 }
         }
@@ -2081,6 +2090,20 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
         } while (sync_status == GL_TIMEOUT_EXPIRED);
         assert(sync_status != GL_WAIT_FAILED);
  
+       // Find min and max timestamp of all input frames that have a timestamp.
+       steady_clock::time_point min_ts = steady_clock::time_point::max(), max_ts = steady_clock::time_point::min();
+       for (const RefCountedFrame &input_frame : frame.input_frames) {
+               if (input_frame && input_frame->received_timestamp > steady_clock::time_point::min()) {
+                       min_ts = min(min_ts, input_frame->received_timestamp);
+                       max_ts = max(max_ts, input_frame->received_timestamp);
+               }
+       }
+       const ReceivedTimestamps received_ts{ min_ts, max_ts };
+
+       static int frameno = 0;
+       print_latency("Current mixer latency (video inputs → ready for encode):",
+               received_ts, (frame_type == FRAME_B), &frameno);
+
         // Release back any input frames we needed to render this frame.
         frame.input_frames.clear();
  
@@ -2109,13 +2132,13 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
                     global_flags.x264_video_to_http) {
                         // Add uncompressed video. (Note that pts == dts here.)
                         // Delay needs to match audio.
-                       FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr));
+                       FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr), received_ts);
                         if (output_frame.data != nullptr) {
                                 if (global_flags.uncompressed_video_to_http) {
                                         add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
                                 } else {
                                         assert(global_flags.x264_video_to_http);
-                                       x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
+                                       x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts);
                                 }
                         }
                 }
@@ -2156,6 +2179,7 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame
         tmp.pts = pts;
         tmp.dts = dts;
         tmp.duration = duration;
+       tmp.received_ts = received_ts;
         storage_task_enqueue(move(tmp));
  
         update_ReferenceFrames(frame_type);
diff --git a/x264_encoder.cpp b/x264_encoder.cpp

index 95d3625354c23ac539cb42917befcb5145c42e98..fcd20e7aa519b33345a86b08728177bddf515b8b 100644 (file)
--- a/x264_encoder.cpp
+++ b/x264_encoder.cpp
@@ -1,5 +1,6 @@
  #include "x264_encoder.h"
  
+#include <assert.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
@@ -10,6 +11,7 @@
  #include "defs.h"
  #include "flags.h"
  #include "mux.h"
+#include "print_latency.h"
  #include "timebase.h"
  #include "x264_speed_control.h"
  
@@ -19,6 +21,7 @@ extern "C" {
  }
  
  using namespace std;
+using namespace std::chrono;
  
  namespace {
  
@@ -55,11 +58,12 @@ X264Encoder::~X264Encoder()
         encoder_thread.join();
  }
  
-void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data)
+void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data, const ReceivedTimestamps &received_ts)
  {
         QueuedFrame qf;
         qf.pts = pts;
         qf.duration = duration;
+       qf.received_ts = received_ts;
  
         {
                 lock_guard<mutex> lock(mu);
@@ -251,6 +255,8 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
                 pic.opaque = reinterpret_cast<void *>(intptr_t(qf.duration));
  
                 input_pic = &pic;
+
+               frames_being_encoded[qf.pts] = qf.received_ts;
         }
  
         // See if we have a new bitrate to change to.
@@ -280,6 +286,18 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
  
         if (num_nal == 0) return;
  
+       if (frames_being_encoded.count(pic.i_pts)) {
+               ReceivedTimestamps received_ts = frames_being_encoded[pic.i_pts];
+               frames_being_encoded.erase(pic.i_pts);
+
+               static int frameno = 0;
+               print_latency("Current x264 latency (video inputs → network mux):",
+                       received_ts, (pic.i_type == X264_TYPE_B || pic.i_type == X264_TYPE_BREF),
+                       &frameno);
+       } else {
+               assert(false);
+       }
+
         // We really need one AVPacket for the entire frame, it seems,
         // so combine it all.
         size_t num_bytes = buffered_sei.size();
diff --git a/x264_encoder.h b/x264_encoder.h

index f607a91df8ab4786f1d27a7df25910ceffe6795f..8adb42ae19102808b2f35d3395aeb3a83e50d9e0 100644 (file)
--- a/x264_encoder.h
+++ b/x264_encoder.h
@@ -20,17 +20,21 @@
  #include <stdint.h>
  #include <x264.h>
  #include <atomic>
+#include <chrono>
  #include <condition_variable>
  #include <memory>
  #include <mutex>
  #include <queue>
  #include <string>
  #include <thread>
+#include <unordered_map>
  
  extern "C" {
  #include <libavformat/avformat.h>
  }
  
+#include "print_latency.h"
+
  class Mux;
  class X264SpeedControl;
  
@@ -47,7 +51,7 @@ public:
  
         // <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
         // Does not block.
-       void add_frame(int64_t pts, int64_t duration, const uint8_t *data);
+       void add_frame(int64_t pts, int64_t duration, const uint8_t *data, const ReceivedTimestamps &received_ts);
  
         std::string get_global_headers() const {
                 while (!x264_init_done) {
@@ -64,6 +68,7 @@ private:
         struct QueuedFrame {
                 int64_t pts, duration;
                 uint8_t *data;
+               ReceivedTimestamps received_ts;
         };
         void encoder_thread_func();
         void init_x264();
@@ -101,6 +106,9 @@ private:
  
         // Whenever the state of <queued_frames> changes.
         std::condition_variable queued_frames_nonempty;
+
+       // Key is the pts of the frame.
+       std::unordered_map<int64_t, ReceivedTimestamps> frames_being_encoded;
  };
  
  #endif  // !defined(_X264ENCODE_H)
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 10 Dec 2016 14:58:35 +0000 (15:58 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 10 Dec 2016 14:58:35 +0000 (15:58 +0100)
Makefile		patch \| blob \| history
README		patch \| blob \| history
bmusb		patch \| blob \| history
decklink_capture.cpp		patch \| blob \| history
flags.cpp		patch \| blob \| history
flags.h		patch \| blob \| history
mixer.cpp		patch \| blob \| history
mixer.h		patch \| blob \| history
print_latency.cpp	[new file with mode: 0644]	patch \| blob
print_latency.h	[new file with mode: 0644]	patch \| blob
quicksync_encoder.cpp		patch \| blob \| history
x264_encoder.cpp		patch \| blob \| history
x264_encoder.h		patch \| blob \| history