X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=quicksync_encoder.cpp;h=1b36aa5246fbda3b19b5e65a5abc48365ab23840;hb=2fa5654e76175e660dfd834df6e85040bba358eb;hp=635a95a5fb297e8a7fb9de7bca63ce9a73ab6d8d;hpb=7297850cec443cb0c02f82d7301a30583744627d;p=nageru diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 635a95a..1b36aa5 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -64,6 +64,18 @@ using namespace std::placeholders; class QOpenGLContext; class QSurface; +namespace { + +// These need to survive several QuickSyncEncoderImpl instances, +// so they are outside. +once_flag quick_sync_metrics_inited; +LatencyHistogram mixer_latency_histogram, qs_latency_histogram; +MuxMetrics current_file_mux_metrics, total_mux_metrics; +std::atomic metric_current_file_start_time_seconds{0.0 / 0.0}; +std::atomic metric_quick_sync_stalled_frames{0}; + +} // namespace + #define CHECK_VASTATUS(va_status, func) \ if (va_status != VA_STATUS_SUCCESS) { \ fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ @@ -736,6 +748,7 @@ void QuickSyncEncoderImpl::enable_zerocopy_if_possible() } else { use_zerocopy = true; } + global_flags.use_zerocopy = use_zerocopy; } VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display) @@ -994,16 +1007,7 @@ int QuickSyncEncoderImpl::setup_encode() gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1); gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1); } else { - size_t bytes_per_pixel; - if (global_flags.x264_bit_depth > 8) { - bytes_per_pixel = 2; - gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R16, frame_width, frame_height); - gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG16, frame_width / 2, frame_height / 2); - } else { - bytes_per_pixel = 1; - gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height); - gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2); - } + size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1; // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API // buffers, due to potentially differing pitch. @@ -1421,8 +1425,8 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task) vaUnmapBuffer(va_dpy, surf->coded_buf); static int frameno = 0; - print_latency("Current QuickSync latency (video inputs → disk mux):", - task.received_ts, (task.frame_type == FRAME_B), &frameno); + print_latency("Current Quick Sync latency (video inputs → disk mux):", + task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram); { // Add video. @@ -1516,14 +1520,15 @@ void QuickSyncEncoderImpl::release_gl_resources() } for (unsigned i = 0; i < SURFACE_NUM; i++) { - if (!use_zerocopy) { + if (use_zerocopy) { + resource_pool->release_2d_texture(gl_surfaces[i].y_tex); + resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex); + } else { glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glDeleteBuffers(1, &gl_surfaces[i].pbo); } - resource_pool->release_2d_texture(gl_surfaces[i].y_tex); - resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex); } has_released_gl_resources = true; @@ -1568,6 +1573,15 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource memset(&slice_param, 0, sizeof(slice_param)); } + call_once(quick_sync_metrics_inited, [](){ + mixer_latency_histogram.init("mixer"); + qs_latency_histogram.init("quick_sync"); + current_file_mux_metrics.init({{ "destination", "current_file" }}); + total_mux_metrics.init({{ "destination", "files_total" }}); + global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE); + global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames); + }); + storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this); encode_thread = thread([this]{ @@ -1611,6 +1625,11 @@ void QuickSyncEncoderImpl::release_gl_surface(size_t display_frame_num) } } +bool QuickSyncEncoderImpl::is_zerocopy() const +{ + return use_zerocopy; +} + bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { assert(!is_shutdown); @@ -1622,6 +1641,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC if (surf == nullptr) { fprintf(stderr, "Warning: No free slots for frame %d, rendering has to wait for H.264 encoder\n", current_storage_frame); + ++metric_quick_sync_stalled_frames; storage_task_queue_changed.wait(lock, [this, &surf]{ if (storage_thread_should_quit) return true; @@ -1634,8 +1654,13 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC surface_for_frame[current_storage_frame] = surf; } - *y_tex = surf->y_tex; - *cbcr_tex = surf->cbcr_tex; + if (use_zerocopy) { + *y_tex = surf->y_tex; + *cbcr_tex = surf->cbcr_tex; + } else { + surf->y_tex = *y_tex; + surf->cbcr_tex = *cbcr_tex; + } if (!global_flags.x264_video_to_disk) { VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image); @@ -1727,6 +1752,9 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame() glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, type, BUFFER_OFFSET(surf->cbcr_offset)); check_error(); + // We don't own these; the caller does. + surf->y_tex = surf->cbcr_tex = 0; + glBindTexture(GL_TEXTURE_2D, 0); check_error(); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -1784,6 +1812,7 @@ void QuickSyncEncoderImpl::shutdown() void QuickSyncEncoderImpl::close_file() { file_mux.reset(); + metric_current_file_start_time_seconds = 0.0 / 0.0; } void QuickSyncEncoderImpl::open_output_file(const std::string &filename) @@ -1806,9 +1835,13 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename) video_extradata = x264_encoder->get_global_headers(); } + current_file_mux_metrics.reset(); + AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, - std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1))); + std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1), + { ¤t_file_mux_metrics, &total_mux_metrics })); + metric_current_file_start_time_seconds = get_timestamp_for_metrics(); if (global_flags.x264_video_to_disk) { x264_encoder->add_mux(file_mux.get()); @@ -1956,7 +1989,7 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); static int frameno = 0; print_latency("Current mixer latency (video inputs → ready for encode):", - received_ts, false, &frameno); + received_ts, false, &frameno, &mixer_latency_histogram); // Release back any input frames we needed to render this frame. frame.input_frames.clear(); @@ -2080,6 +2113,11 @@ void QuickSyncEncoder::add_audio(int64_t pts, vector audio) impl->add_audio(pts, audio); } +bool QuickSyncEncoder::is_zerocopy() const +{ + return impl->is_zerocopy(); +} + bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex);