X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=quicksync_encoder.cpp;h=67e9668faaecbac253e6d37877664651ba47a927;hb=fa54f2630c56a1df0046923d6a77b1bd58abf240;hp=8cf8cf3e2722a192e7ff2690fd12a03bee1d451f;hpb=85ff87cd9ccbee090fdf11ed6322f7ee9d65730c;p=nageru diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 8cf8cf3..67e9668 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -64,6 +64,18 @@ using namespace std::placeholders; class QOpenGLContext; class QSurface; +namespace { + +// These need to survive several QuickSyncEncoderImpl instances, +// so they are outside. +once_flag quick_sync_metrics_inited; +LatencyHistogram mixer_latency_histogram, qs_latency_histogram; +MuxMetrics current_file_mux_metrics, total_mux_metrics; +std::atomic metric_current_file_start_time_seconds{0.0 / 0.0}; +std::atomic metric_quick_sync_stalled_frames{0}; + +} // namespace + #define CHECK_VASTATUS(va_status, func) \ if (va_status != VA_STATUS_SUCCESS) { \ fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ @@ -106,14 +118,6 @@ static constexpr unsigned int MaxFrameNum = (2<<16); static constexpr unsigned int MaxPicOrderCntLsb = (2<<8); static constexpr unsigned int Log2MaxFrameNum = 16; static constexpr unsigned int Log2MaxPicOrderCntLsb = 8; -static constexpr int rc_default_modes[] = { // Priority list of modes. - VA_RC_VBR, - VA_RC_CQP, - VA_RC_VBR_CONSTRAINED, - VA_RC_CBR, - VA_RC_VCM, - VA_RC_NONE, -}; using namespace std; @@ -332,7 +336,7 @@ void QuickSyncEncoderImpl::sps_rbsp(YCbCrLumaCoefficients ycbcr_coefficients, bi bitstream_put_ui(bs, 1, 1); /* colour_description_present_flag */ { bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */ - bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */ + bitstream_put_ui(bs, 13, 8); /* transfer_characteristics (13 = sRGB) */ if (ycbcr_coefficients == YCBCR_REC_709) { bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */ } else { @@ -702,26 +706,6 @@ void encoding2display_order( } -static const char *rc_to_string(int rc_mode) -{ - switch (rc_mode) { - case VA_RC_NONE: - return "NONE"; - case VA_RC_CBR: - return "CBR"; - case VA_RC_VBR: - return "VBR"; - case VA_RC_VCM: - return "VCM"; - case VA_RC_CQP: - return "CQP"; - case VA_RC_VBR_CONSTRAINED: - return "VBR_CONSTRAINED"; - default: - return "Unknown"; - } -} - void QuickSyncEncoderImpl::enable_zerocopy_if_possible() { if (global_flags.x264_video_to_disk) { @@ -867,23 +851,13 @@ int QuickSyncEncoderImpl::init_va(const string &va_display) } if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) { - int tmp = attrib[VAConfigAttribRateControl].value; - - if (rc_mode == -1 || !(rc_mode & tmp)) { - if (rc_mode != -1) { - printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode)); - } - - for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) { - if (rc_default_modes[i] & tmp) { - rc_mode = rc_default_modes[i]; - break; - } - } + if (!(attrib[VAConfigAttribRateControl].value & VA_RC_CQP)) { + fprintf(stderr, "ERROR: VA-API encoder does not support CQP mode.\n"); + exit(1); } config_attrib[config_attrib_num].type = VAConfigAttribRateControl; - config_attrib[config_attrib_num].value = rc_mode; + config_attrib[config_attrib_num].value = VA_RC_CQP; config_attrib_num++; } @@ -1414,7 +1388,7 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task) static int frameno = 0; print_latency("Current Quick Sync latency (video inputs → disk mux):", - task.received_ts, (task.frame_type == FRAME_B), &frameno); + task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram); { // Add video. @@ -1561,6 +1535,15 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource memset(&slice_param, 0, sizeof(slice_param)); } + call_once(quick_sync_metrics_inited, [](){ + mixer_latency_histogram.init("mixer"); + qs_latency_histogram.init("quick_sync"); + current_file_mux_metrics.init({{ "destination", "current_file" }}); + total_mux_metrics.init({{ "destination", "files_total" }}); + global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE); + global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames); + }); + storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this); encode_thread = thread([this]{ @@ -1620,6 +1603,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC if (surf == nullptr) { fprintf(stderr, "Warning: No free slots for frame %d, rendering has to wait for H.264 encoder\n", current_storage_frame); + ++metric_quick_sync_stalled_frames; storage_task_queue_changed.wait(lock, [this, &surf]{ if (storage_thread_should_quit) return true; @@ -1672,8 +1656,8 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC // Create CbCr image. surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; EGLint cbcr_attribs[] = { - EGL_WIDTH, frame_width, - EGL_HEIGHT, frame_height, + EGL_WIDTH, frame_width / 2, + EGL_HEIGHT, frame_height / 2, EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), @@ -1697,6 +1681,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC void QuickSyncEncoderImpl::add_audio(int64_t pts, vector audio) { + lock_guard lock(file_audio_encoder_mutex); assert(!is_shutdown); file_audio_encoder->encode_audio(audio, pts + global_delay()); } @@ -1778,7 +1763,10 @@ void QuickSyncEncoderImpl::shutdown() storage_thread.join(); // Encode any leftover audio in the queues, and also any delayed frames. - file_audio_encoder->encode_last_audio(); + { + lock_guard lock(file_audio_encoder_mutex); + file_audio_encoder->encode_last_audio(); + } if (!global_flags.x264_video_to_disk) { release_encode(); @@ -1790,6 +1778,7 @@ void QuickSyncEncoderImpl::shutdown() void QuickSyncEncoderImpl::close_file() { file_mux.reset(); + metric_current_file_start_time_seconds = 0.0 / 0.0; } void QuickSyncEncoderImpl::open_output_file(const std::string &filename) @@ -1812,9 +1801,17 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename) video_extradata = x264_encoder->get_global_headers(); } - AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); - file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, - std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1))); + current_file_mux_metrics.reset(); + + { + lock_guard lock(file_audio_encoder_mutex); + AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); + file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, + std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1), + Mux::WRITE_BACKGROUND, + { ¤t_file_mux_metrics, &total_mux_metrics })); + } + metric_current_file_start_time_seconds = get_timestamp_for_metrics(); if (global_flags.x264_video_to_disk) { x264_encoder->add_mux(file_mux.get()); @@ -1954,15 +1951,19 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, // Wait for the GPU to be done with the frame. GLenum sync_status; do { - sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000); + sync_status = glClientWaitSync(frame.fence.get(), 0, 0); check_error(); + if (sync_status == GL_TIMEOUT_EXPIRED) { + // NVIDIA likes to busy-wait; yield instead. + this_thread::sleep_for(milliseconds(1)); + } } while (sync_status == GL_TIMEOUT_EXPIRED); assert(sync_status != GL_WAIT_FAILED); ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); static int frameno = 0; print_latency("Current mixer latency (video inputs → ready for encode):", - received_ts, false, &frameno); + received_ts, false, &frameno, &mixer_latency_histogram); // Release back any input frames we needed to render this frame. frame.input_frames.clear();