]> git.sesse.net Git - nageru/blobdiff - quicksync_encoder.cpp
Fix a lock that was held too long in an edge case.
[nageru] / quicksync_encoder.cpp
index 635a95a5fb297e8a7fb9de7bca63ce9a73ab6d8d..1b36aa5246fbda3b19b5e65a5abc48365ab23840 100644 (file)
@@ -64,6 +64,18 @@ using namespace std::placeholders;
 class QOpenGLContext;
 class QSurface;
 
+namespace {
+
+// These need to survive several QuickSyncEncoderImpl instances,
+// so they are outside.
+once_flag quick_sync_metrics_inited;
+LatencyHistogram mixer_latency_histogram, qs_latency_histogram;
+MuxMetrics current_file_mux_metrics, total_mux_metrics;
+std::atomic<double> metric_current_file_start_time_seconds{0.0 / 0.0};
+std::atomic<int64_t> metric_quick_sync_stalled_frames{0};
+
+}  // namespace
+
 #define CHECK_VASTATUS(va_status, func)                                 \
     if (va_status != VA_STATUS_SUCCESS) {                               \
         fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
@@ -736,6 +748,7 @@ void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
        } else {
                use_zerocopy = true;
        }
+       global_flags.use_zerocopy = use_zerocopy;
 }
 
 VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
@@ -994,16 +1007,7 @@ int QuickSyncEncoderImpl::setup_encode()
                        gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
                        gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
                } else {
-                       size_t bytes_per_pixel;
-                       if (global_flags.x264_bit_depth > 8) {
-                               bytes_per_pixel = 2;
-                               gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R16, frame_width, frame_height);
-                               gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG16, frame_width / 2, frame_height / 2);
-                       } else {
-                               bytes_per_pixel = 1;
-                               gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
-                               gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
-                       }
+                       size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1;
 
                        // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
                        // buffers, due to potentially differing pitch.
@@ -1421,8 +1425,8 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task)
        vaUnmapBuffer(va_dpy, surf->coded_buf);
 
        static int frameno = 0;
-       print_latency("Current QuickSync latency (video inputs → disk mux):",
-               task.received_ts, (task.frame_type == FRAME_B), &frameno);
+       print_latency("Current Quick Sync latency (video inputs → disk mux):",
+               task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram);
 
        {
                // Add video.
@@ -1516,14 +1520,15 @@ void QuickSyncEncoderImpl::release_gl_resources()
        }
 
        for (unsigned i = 0; i < SURFACE_NUM; i++) {
-               if (!use_zerocopy) {
+               if (use_zerocopy) {
+                       resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
+                       resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
+               } else {
                        glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
                        glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
                        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
                        glDeleteBuffers(1, &gl_surfaces[i].pbo);
                }
-               resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
-               resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
        }
 
        has_released_gl_resources = true;
@@ -1568,6 +1573,15 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
                memset(&slice_param, 0, sizeof(slice_param));
        }
 
+       call_once(quick_sync_metrics_inited, [](){
+               mixer_latency_histogram.init("mixer");
+               qs_latency_histogram.init("quick_sync");
+               current_file_mux_metrics.init({{ "destination", "current_file" }});
+               total_mux_metrics.init({{ "destination", "files_total" }});
+               global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE);
+               global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames);
+       });
+
        storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
 
        encode_thread = thread([this]{
@@ -1611,6 +1625,11 @@ void QuickSyncEncoderImpl::release_gl_surface(size_t display_frame_num)
        }
 }
 
+bool QuickSyncEncoderImpl::is_zerocopy() const
+{
+       return use_zerocopy;
+}
+
 bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
 {
        assert(!is_shutdown);
@@ -1622,6 +1641,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC
                if (surf == nullptr) {
                        fprintf(stderr, "Warning: No free slots for frame %d, rendering has to wait for H.264 encoder\n",
                                current_storage_frame);
+                       ++metric_quick_sync_stalled_frames;
                        storage_task_queue_changed.wait(lock, [this, &surf]{
                                if (storage_thread_should_quit)
                                        return true;
@@ -1634,8 +1654,13 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC
                surface_for_frame[current_storage_frame] = surf;
        }
 
-       *y_tex = surf->y_tex;
-       *cbcr_tex = surf->cbcr_tex;
+       if (use_zerocopy) {
+               *y_tex = surf->y_tex;
+               *cbcr_tex = surf->cbcr_tex;
+       } else {
+               surf->y_tex = *y_tex;
+               surf->cbcr_tex = *cbcr_tex;
+       }
 
        if (!global_flags.x264_video_to_disk) {
                VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
@@ -1727,6 +1752,9 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame()
                glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, type, BUFFER_OFFSET(surf->cbcr_offset));
                check_error();
 
+               // We don't own these; the caller does.
+               surf->y_tex = surf->cbcr_tex = 0;
+
                glBindTexture(GL_TEXTURE_2D, 0);
                check_error();
                glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -1784,6 +1812,7 @@ void QuickSyncEncoderImpl::shutdown()
 void QuickSyncEncoderImpl::close_file()
 {
        file_mux.reset();
+       metric_current_file_start_time_seconds = 0.0 / 0.0;
 }
 
 void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
@@ -1806,9 +1835,13 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
                video_extradata = x264_encoder->get_global_headers();
        }
 
+       current_file_mux_metrics.reset();
+
        AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
        file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
-               std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
+               std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
+               { &current_file_mux_metrics, &total_mux_metrics }));
+       metric_current_file_start_time_seconds = get_timestamp_for_metrics();
 
        if (global_flags.x264_video_to_disk) {
                x264_encoder->add_mux(file_mux.get());
@@ -1956,7 +1989,7 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame,
        ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames);
        static int frameno = 0;
        print_latency("Current mixer latency (video inputs → ready for encode):",
-               received_ts, false, &frameno);
+               received_ts, false, &frameno, &mixer_latency_histogram);
 
        // Release back any input frames we needed to render this frame.
        frame.input_frames.clear();
@@ -2080,6 +2113,11 @@ void QuickSyncEncoder::add_audio(int64_t pts, vector<float> audio)
        impl->add_audio(pts, audio);
 }
 
+bool QuickSyncEncoder::is_zerocopy() const
+{
+       return impl->is_zerocopy();
+}
+
 bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
 {
        return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex);