class QOpenGLContext;
class QSurface;
+namespace {
+
+// These need to survive several QuickSyncEncoderImpl instances,
+// so they are outside.
+once_flag quick_sync_metrics_inited;
+LatencyHistogram mixer_latency_histogram, qs_latency_histogram;
+MuxMetrics current_file_mux_metrics, total_mux_metrics;
+std::atomic<double> metric_current_file_start_time_seconds{0.0 / 0.0};
+std::atomic<int64_t> metric_quick_sync_stalled_frames{0};
+
+} // namespace
+
#define CHECK_VASTATUS(va_status, func) \
if (va_status != VA_STATUS_SUCCESS) { \
fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
exit(1); \
}
+#undef BUFFER_OFFSET
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
//#include "loadsurface.h"
static constexpr unsigned int MaxPicOrderCntLsb = (2<<8);
static constexpr unsigned int Log2MaxFrameNum = 16;
static constexpr unsigned int Log2MaxPicOrderCntLsb = 8;
-static constexpr int rc_default_modes[] = { // Priority list of modes.
- VA_RC_VBR,
- VA_RC_CQP,
- VA_RC_VBR_CONSTRAINED,
- VA_RC_CBR,
- VA_RC_VCM,
- VA_RC_NONE,
-};
using namespace std;
bitstream_put_ui(bs, 1, 1); /* colour_description_present_flag */
{
bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */
- bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */
+ bitstream_put_ui(bs, 13, 8); /* transfer_characteristics (13 = sRGB) */
if (ycbcr_coefficients == YCBCR_REC_709) {
bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */
} else {
}
-static const char *rc_to_string(int rc_mode)
-{
- switch (rc_mode) {
- case VA_RC_NONE:
- return "NONE";
- case VA_RC_CBR:
- return "CBR";
- case VA_RC_VBR:
- return "VBR";
- case VA_RC_VCM:
- return "VCM";
- case VA_RC_CQP:
- return "CQP";
- case VA_RC_VBR_CONSTRAINED:
- return "VBR_CONSTRAINED";
- default:
- return "Unknown";
- }
-}
-
void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
{
if (global_flags.x264_video_to_disk) {
} else {
use_zerocopy = true;
}
+ global_flags.use_zerocopy = use_zerocopy;
}
VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
int QuickSyncEncoderImpl::init_va(const string &va_display)
{
- VAProfile profile_list[]={VAProfileH264High, VAProfileH264Main, VAProfileH264Baseline, VAProfileH264ConstrainedBaseline};
+ VAProfile profile_list[]={VAProfileH264High, VAProfileH264Main, VAProfileH264ConstrainedBaseline};
VAEntrypoint *entrypoints;
int num_entrypoints, slice_entrypoint;
int support_encode = 0;
exit(1);
} else {
switch (h264_profile) {
- case VAProfileH264Baseline:
- ip_period = 1;
- constraint_set_flag |= (1 << 0); /* Annex A.2.1 */
- h264_entropy_mode = 0;
- break;
case VAProfileH264ConstrainedBaseline:
constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.2 */
ip_period = 1;
constraint_set_flag |= (1 << 3); /* Annex A.2.4 */
break;
default:
- h264_profile = VAProfileH264Baseline;
+ h264_profile = VAProfileH264ConstrainedBaseline;
ip_period = 1;
constraint_set_flag |= (1 << 0); /* Annex A.2.1 */
break;
}
if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) {
- int tmp = attrib[VAConfigAttribRateControl].value;
-
- if (rc_mode == -1 || !(rc_mode & tmp)) {
- if (rc_mode != -1) {
- printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode));
- }
-
- for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) {
- if (rc_default_modes[i] & tmp) {
- rc_mode = rc_default_modes[i];
- break;
- }
- }
+ if (!(attrib[VAConfigAttribRateControl].value & VA_RC_CQP)) {
+ fprintf(stderr, "ERROR: VA-API encoder does not support CQP mode.\n");
+ exit(1);
}
config_attrib[config_attrib_num].type = VAConfigAttribRateControl;
- config_attrib[config_attrib_num].value = rc_mode;
+ config_attrib[config_attrib_num].value = VA_RC_CQP;
config_attrib_num++;
}
gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
} else {
- size_t bytes_per_pixel;
- if (global_flags.x264_bit_depth > 8) {
- bytes_per_pixel = 2;
- gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R16, frame_width, frame_height);
- gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG16, frame_width / 2, frame_height / 2);
- } else {
- bytes_per_pixel = 1;
- gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
- gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
- }
+ size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1;
// Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
// buffers, due to potentially differing pitch.
vaUnmapBuffer(va_dpy, surf->coded_buf);
static int frameno = 0;
- print_latency("Current QuickSync latency (video inputs → disk mux):",
- task.received_ts, (task.frame_type == FRAME_B), &frameno);
+ print_latency("Current Quick Sync latency (video inputs → disk mux):",
+ task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram);
{
// Add video.
}
for (unsigned i = 0; i < SURFACE_NUM; i++) {
- if (!use_zerocopy) {
+ if (use_zerocopy) {
+ resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
+ resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
+ } else {
glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glDeleteBuffers(1, &gl_surfaces[i].pbo);
}
- resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
- resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
}
has_released_gl_resources = true;
memset(&slice_param, 0, sizeof(slice_param));
}
+ call_once(quick_sync_metrics_inited, [](){
+ mixer_latency_histogram.init("mixer");
+ qs_latency_histogram.init("quick_sync");
+ current_file_mux_metrics.init({{ "destination", "current_file" }});
+ total_mux_metrics.init({{ "destination", "files_total" }});
+ global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE);
+ global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames);
+ });
+
storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
encode_thread = thread([this]{
}
}
+bool QuickSyncEncoderImpl::is_zerocopy() const
+{
+ return use_zerocopy;
+}
+
bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
{
assert(!is_shutdown);
if (surf == nullptr) {
fprintf(stderr, "Warning: No free slots for frame %d, rendering has to wait for H.264 encoder\n",
current_storage_frame);
+ ++metric_quick_sync_stalled_frames;
storage_task_queue_changed.wait(lock, [this, &surf]{
if (storage_thread_should_quit)
return true;
surface_for_frame[current_storage_frame] = surf;
}
- *y_tex = surf->y_tex;
- *cbcr_tex = surf->cbcr_tex;
+ if (use_zerocopy) {
+ *y_tex = surf->y_tex;
+ *cbcr_tex = surf->cbcr_tex;
+ } else {
+ surf->y_tex = *y_tex;
+ surf->cbcr_tex = *cbcr_tex;
+ }
if (!global_flags.x264_video_to_disk) {
VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
// Create CbCr image.
surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
EGLint cbcr_attribs[] = {
- EGL_WIDTH, frame_width,
- EGL_HEIGHT, frame_height,
+ EGL_WIDTH, frame_width / 2,
+ EGL_HEIGHT, frame_height / 2,
EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
void QuickSyncEncoderImpl::add_audio(int64_t pts, vector<float> audio)
{
+ lock_guard<mutex> lock(file_audio_encoder_mutex);
assert(!is_shutdown);
file_audio_encoder->encode_audio(audio, pts + global_delay());
}
glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, type, BUFFER_OFFSET(surf->cbcr_offset));
check_error();
+ // We don't own these; the caller does.
+ surf->y_tex = surf->cbcr_tex = 0;
+
glBindTexture(GL_TEXTURE_2D, 0);
check_error();
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
storage_thread.join();
// Encode any leftover audio in the queues, and also any delayed frames.
- file_audio_encoder->encode_last_audio();
+ {
+ lock_guard<mutex> lock(file_audio_encoder_mutex);
+ file_audio_encoder->encode_last_audio();
+ }
if (!global_flags.x264_video_to_disk) {
release_encode();
void QuickSyncEncoderImpl::close_file()
{
file_mux.reset();
+ metric_current_file_start_time_seconds = 0.0 / 0.0;
}
void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
video_extradata = x264_encoder->get_global_headers();
}
- AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
- file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
- std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
+ current_file_mux_metrics.reset();
+
+ {
+ lock_guard<mutex> lock(file_audio_encoder_mutex);
+ AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
+ file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
+ std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
+ Mux::WRITE_BACKGROUND,
+ { ¤t_file_mux_metrics, &total_mux_metrics }));
+ }
+ metric_current_file_start_time_seconds = get_timestamp_for_metrics();
if (global_flags.x264_video_to_disk) {
x264_encoder->add_mux(file_mux.get());
if (frame_type == FRAME_IDR) {
// Release any reference frames from the previous GOP.
- for (const ReferenceFrame &frame : reference_frames) {
- release_gl_surface(frame.display_number);
+ {
+ unique_lock<mutex> lock(storage_task_queue_mutex);
+ for (const ReferenceFrame &frame : reference_frames) {
+ release_gl_surface(frame.display_number);
+ }
}
reference_frames.clear();
current_ref_frame_num = 0;
// Wait for the GPU to be done with the frame.
GLenum sync_status;
do {
- sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000);
+ sync_status = glClientWaitSync(frame.fence.get(), 0, 0);
check_error();
+ if (sync_status == GL_TIMEOUT_EXPIRED) {
+ // NVIDIA likes to busy-wait; yield instead.
+ this_thread::sleep_for(milliseconds(1));
+ }
} while (sync_status == GL_TIMEOUT_EXPIRED);
assert(sync_status != GL_WAIT_FAILED);
ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames);
static int frameno = 0;
print_latency("Current mixer latency (video inputs → ready for encode):",
- received_ts, false, &frameno);
+ received_ts, false, &frameno, &mixer_latency_histogram);
// Release back any input frames we needed to render this frame.
frame.input_frames.clear();
impl->add_audio(pts, audio);
}
+bool QuickSyncEncoder::is_zerocopy() const
+{
+ return impl->is_zerocopy();
+}
+
bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
{
return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex);