# Mixer objects
AUDIO_MIXER_OBJS = audio_mixer.o alsa_input.o alsa_pool.o ebu_r128_proc.o stereocompressor.o resampling_queue.o flags.o correlation_measurer.o filter.o input_mapping.o state.pb.o
-OBJS += mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o httpd.o flags.o image_input.o alsa_output.o disk_space_estimator.o $(AUDIO_MIXER_OBJS)
+OBJS += mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o httpd.o flags.o image_input.o alsa_output.o disk_space_estimator.o print_latency.o $(AUDIO_MIXER_OBJS)
# Streaming and encoding objects
OBJS += quicksync_encoder.o x264_encoder.o x264_speed_control.o video_encoder.o metacube2.o mux.o audio_encoder.o ffmpeg_raii.o
libmovit-dev libegl1-mesa-dev libasound2-dev libx264-dev libbmusb-dev \
protobuf-compiler libprotobuf-dev libpci-dev
-Exceptions as of November 2016:
+Exceptions as of December 2016:
- - libusb 1.0.21 is not yet in stretch; you need to fetch it from sid.
- Same with a bmusb version built against it.
+ - bmusb 0.5.3 is not yet in sid.
The patches/ directory contains a patch that helps zita-resampler performance.
-Subproject commit a765e066b74ac52ff0abf239d430d6f8d83f792e
+Subproject commit 227166d4398263e1191bdfe244f62bc38aaf4c1f
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <chrono>
#include <cstdint>
#include <utility>
#include <vector>
#define FRAME_SIZE (8 << 20) // 8 MB.
using namespace std;
+using namespace std::chrono;
using namespace std::placeholders;
using namespace bmusb;
video_format.width = width;
video_format.height = height;
+
+ current_video_frame.received_timestamp = steady_clock::now();
}
}
audio_format.bits_per_sample = 32;
audio_format.num_channels = 2;
+
+ current_audio_frame.received_timestamp = steady_clock::now();
}
}
OPTION_DISABLE_MAKEUP_GAIN_AUTO,
OPTION_ENABLE_MAKEUP_GAIN_AUTO,
OPTION_DISABLE_ALSA_OUTPUT,
- OPTION_NO_FLUSH_PBOS
+ OPTION_NO_FLUSH_PBOS,
+ OPTION_PRINT_VIDEO_LATENCY
};
void usage()
fprintf(stderr, " --no-flush-pbos do not explicitly signal texture data uploads\n");
fprintf(stderr, " (will give display corruption, but makes it\n");
fprintf(stderr, " possible to run with apitrace in real time)\n");
+ fprintf(stderr, " --print-video-latency print out measurements of video latency on stdout\n");
}
void parse_flags(int argc, char * const argv[])
{ "enable-makeup-gain-auto", no_argument, 0, OPTION_ENABLE_MAKEUP_GAIN_AUTO },
{ "disable-alsa-output", no_argument, 0, OPTION_DISABLE_ALSA_OUTPUT },
{ "no-flush-pbos", no_argument, 0, OPTION_NO_FLUSH_PBOS },
+ { "print-video-latency", no_argument, 0, OPTION_PRINT_VIDEO_LATENCY },
{ 0, 0, 0, 0 }
};
vector<string> theme_dirs;
case OPTION_NO_FLUSH_PBOS:
global_flags.flush_pbos = false;
break;
+ case OPTION_PRINT_VIDEO_LATENCY:
+ global_flags.print_video_latency = true;
+ break;
case OPTION_HELP:
usage();
exit(0);
bool multichannel_mapping_mode = false; // Implicitly true if input_mapping_filename is nonempty.
std::string input_mapping_filename; // Empty for none.
std::string midi_mapping_filename; // Empty for none.
+ bool print_video_latency = false;
};
extern Flags global_flags;
new_frame.interlaced = video_format.interlaced;
new_frame.upload_func = upload_func;
new_frame.dropped_frames = dropped_frames;
+ new_frame.received_timestamp = video_frame.received_timestamp; // Ignore the audio timestamp.
card->new_frames.push(move(new_frame));
card->new_frames_changed.notify_all();
}
unsigned field; // Which field (0 or 1) of the frame to use. Always 0 for progressive.
std::function<void()> upload_func; // Needs to be called to actually upload the texture to OpenGL.
unsigned dropped_frames = 0; // Number of dropped frames before this one.
+ std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
};
std::queue<NewFrame> new_frames;
bool should_quit = false;
--- /dev/null
+#include "print_latency.h"
+#include "flags.h"
+
+#include <stdio.h>
+#include <chrono>
+#include <string>
+
+using namespace std;
+using namespace std::chrono;
+
+void print_latency(const string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno)
+{
+ if (received_ts.max_ts == steady_clock::time_point::min())
+ return;
+
+ // 101 is chosen so that it's prime, which is unlikely to get the same frame type every time.
+ if (global_flags.print_video_latency && (++*frameno % 101) == 0) {
+ const steady_clock::time_point now = steady_clock::now();
+ printf("%-60s %4.0f ms (lowest-latency input), %4.0f ms (highest-latency input)",
+ header.c_str(),
+ 1e3 * std::chrono::duration<double>(now - received_ts.max_ts).count(),
+ 1e3 * std::chrono::duration<double>(now - received_ts.min_ts).count());
+
+ if (is_b_frame) {
+ printf(" [on B-frame; potential extra latency]\n");
+ } else {
+ printf("\n");
+ }
+ }
+}
--- /dev/null
+#ifndef _PRINT_LATENCY_H
+#define _PRINT_LATENCY_H 1
+
+// A small utility function to print the latency between two end points
+// (typically when the frame was received from the video card, and some
+// point when the frame is ready to be output in some form).
+
+#include <chrono>
+#include <string>
+
+// Since every output frame is based on multiple input frames, we need
+// more than one start timestamp. For now, we keep just the smallest
+// and largest timestamp, so that we can print out a range.
+// For both of these, steady_clock::time_point::min() is used for “not set”.
+struct ReceivedTimestamps {
+ std::chrono::steady_clock::time_point min_ts, max_ts;
+};
+
+void print_latency(const std::string &header, const ReceivedTimestamps &received_ts, bool is_b_frame, int *frameno);
+
+#endif // !defined(_PRINT_LATENCY_H)
#include <va/va_enc_h264.h>
#include <va/va_x11.h>
#include <algorithm>
+#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <cstdint>
#include "ffmpeg_raii.h"
#include "flags.h"
#include "mux.h"
+#include "print_latency.h"
#include "ref_counted_frame.h"
#include "timebase.h"
#include "x264_encoder.h"
using namespace std;
+using namespace std::chrono;
using namespace std::placeholders;
class QOpenGLContext;
struct Frame {
int64_t pts, duration;
uint8_t *data;
+ ReceivedTimestamps received_ts;
// Invert to get the smallest pts first.
bool operator< (const Frame &other) const { return pts > other.pts; }
// Does _not_ take ownership of data; a copy is taken if needed.
// The returned pointer is valid until the next call to reorder_frame, or destruction.
// As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
- Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data);
+ Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts);
// The same as reorder_frame, but without inserting anything. Used to empty the queue.
Frame get_first_frame();
}
}
-FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data)
+FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data, const ReceivedTimestamps &received_ts)
{
if (queue_length == 0) {
- return Frame{pts, duration, data};
+ return Frame{pts, duration, data, received_ts};
}
assert(!freelist.empty());
uint8_t *storage = freelist.top();
freelist.pop();
memcpy(storage, data, width * height * 2);
- frames.push(Frame{pts, duration, storage});
+ frames.push(Frame{pts, duration, storage, received_ts});
if (frames.size() >= queue_length) {
return get_first_frame();
} else {
- return Frame{-1, -1, nullptr};
+ return Frame{-1, -1, nullptr, steady_clock::time_point::min(), steady_clock::time_point::min()};
}
}
int frame_type;
vector<float> audio;
int64_t pts, dts, duration;
+ ReceivedTimestamps received_ts;
};
struct PendingFrame {
RefCountedGLsync fence;
}
vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf);
+ static int frameno = 0;
+ print_latency("Current QuickSync latency (video inputs → disk mux):",
+ task.received_ts, (task.frame_type == FRAME_B), &frameno);
+
{
// Add video.
AVPacket pkt;
add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
} else {
assert(global_flags.x264_video_to_http);
- x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
+ x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts);
}
}
}
} while (sync_status == GL_TIMEOUT_EXPIRED);
assert(sync_status != GL_WAIT_FAILED);
+ // Find min and max timestamp of all input frames that have a timestamp.
+ steady_clock::time_point min_ts = steady_clock::time_point::max(), max_ts = steady_clock::time_point::min();
+ for (const RefCountedFrame &input_frame : frame.input_frames) {
+ if (input_frame && input_frame->received_timestamp > steady_clock::time_point::min()) {
+ min_ts = min(min_ts, input_frame->received_timestamp);
+ max_ts = max(max_ts, input_frame->received_timestamp);
+ }
+ }
+ const ReceivedTimestamps received_ts{ min_ts, max_ts };
+
+ static int frameno = 0;
+ print_latency("Current mixer latency (video inputs → ready for encode):",
+ received_ts, (frame_type == FRAME_B), &frameno);
+
// Release back any input frames we needed to render this frame.
frame.input_frames.clear();
global_flags.x264_video_to_http) {
// Add uncompressed video. (Note that pts == dts here.)
// Delay needs to match audio.
- FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr));
+ FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr), received_ts);
if (output_frame.data != nullptr) {
if (global_flags.uncompressed_video_to_http) {
add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
} else {
assert(global_flags.x264_video_to_http);
- x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
+ x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data, output_frame.received_ts);
}
}
}
tmp.pts = pts;
tmp.dts = dts;
tmp.duration = duration;
+ tmp.received_ts = received_ts;
storage_task_enqueue(move(tmp));
update_ReferenceFrames(frame_type);
#include "x264_encoder.h"
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "defs.h"
#include "flags.h"
#include "mux.h"
+#include "print_latency.h"
#include "timebase.h"
#include "x264_speed_control.h"
}
using namespace std;
+using namespace std::chrono;
namespace {
encoder_thread.join();
}
-void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data)
+void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data, const ReceivedTimestamps &received_ts)
{
QueuedFrame qf;
qf.pts = pts;
qf.duration = duration;
+ qf.received_ts = received_ts;
{
lock_guard<mutex> lock(mu);
pic.opaque = reinterpret_cast<void *>(intptr_t(qf.duration));
input_pic = &pic;
+
+ frames_being_encoded[qf.pts] = qf.received_ts;
}
// See if we have a new bitrate to change to.
if (num_nal == 0) return;
+ if (frames_being_encoded.count(pic.i_pts)) {
+ ReceivedTimestamps received_ts = frames_being_encoded[pic.i_pts];
+ frames_being_encoded.erase(pic.i_pts);
+
+ static int frameno = 0;
+ print_latency("Current x264 latency (video inputs → network mux):",
+ received_ts, (pic.i_type == X264_TYPE_B || pic.i_type == X264_TYPE_BREF),
+ &frameno);
+ } else {
+ assert(false);
+ }
+
// We really need one AVPacket for the entire frame, it seems,
// so combine it all.
size_t num_bytes = buffered_sei.size();
#include <stdint.h>
#include <x264.h>
#include <atomic>
+#include <chrono>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <thread>
+#include <unordered_map>
extern "C" {
#include <libavformat/avformat.h>
}
+#include "print_latency.h"
+
class Mux;
class X264SpeedControl;
// <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
// Does not block.
- void add_frame(int64_t pts, int64_t duration, const uint8_t *data);
+ void add_frame(int64_t pts, int64_t duration, const uint8_t *data, const ReceivedTimestamps &received_ts);
std::string get_global_headers() const {
while (!x264_init_done) {
struct QueuedFrame {
int64_t pts, duration;
uint8_t *data;
+ ReceivedTimestamps received_ts;
};
void encoder_thread_func();
void init_x264();
// Whenever the state of <queued_frames> changes.
std::condition_variable queued_frames_nonempty;
+
+ // Key is the pts of the frame.
+ std::unordered_map<int64_t, ReceivedTimestamps> frames_being_encoded;
};
#endif // !defined(_X264ENCODE_H)