]> git.sesse.net Git - nageru/blobdiff - quicksync_encoder.cpp
Move audio encoding over to its own mutex, again reducing mutex contention.
[nageru] / quicksync_encoder.cpp
index 0c9cf0640446f39df3dcdfca27f0d16f584a50d2..f7e1696ab9f13419f182ed03a556e2e913c2c1b1 100644 (file)
@@ -68,7 +68,7 @@ namespace {
 
 // These need to survive several QuickSyncEncoderImpl instances,
 // so they are outside.
-bool quick_sync_metrics_inited = false;
+once_flag quick_sync_metrics_inited;
 LatencyHistogram mixer_latency_histogram, qs_latency_histogram;
 MuxMetrics current_file_mux_metrics, total_mux_metrics;
 std::atomic<double> metric_current_file_start_time_seconds{0.0 / 0.0};
@@ -1573,15 +1573,14 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource
                memset(&slice_param, 0, sizeof(slice_param));
        }
 
-       if (!quick_sync_metrics_inited) {
+       call_once(quick_sync_metrics_inited, [](){
                mixer_latency_histogram.init("mixer");
                qs_latency_histogram.init("quick_sync");
                current_file_mux_metrics.init({{ "destination", "current_file" }});
                total_mux_metrics.init({{ "destination", "files_total" }});
                global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE);
                global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames);
-               quick_sync_metrics_inited = true;
-       }
+       });
 
        storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
 
@@ -1720,6 +1719,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC
 
 void QuickSyncEncoderImpl::add_audio(int64_t pts, vector<float> audio)
 {
+       lock_guard<mutex> lock(file_audio_encoder_mutex);
        assert(!is_shutdown);
        file_audio_encoder->encode_audio(audio, pts + global_delay());
 }
@@ -1801,7 +1801,10 @@ void QuickSyncEncoderImpl::shutdown()
        storage_thread.join();
 
        // Encode any leftover audio in the queues, and also any delayed frames.
-       file_audio_encoder->encode_last_audio();
+       {
+               lock_guard<mutex> lock(file_audio_encoder_mutex);
+               file_audio_encoder->encode_last_audio();
+       }
 
        if (!global_flags.x264_video_to_disk) {
                release_encode();
@@ -1838,10 +1841,14 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
 
        current_file_mux_metrics.reset();
 
-       AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
-       file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
-               std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
-               { &current_file_mux_metrics, &total_mux_metrics }));
+       {
+               lock_guard<mutex> lock(file_audio_encoder_mutex);
+               AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
+               file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
+                       std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
+                       Mux::WRITE_BACKGROUND,
+                       { &current_file_mux_metrics, &total_mux_metrics }));
+       }
        metric_current_file_start_time_seconds = get_timestamp_for_metrics();
 
        if (global_flags.x264_video_to_disk) {
@@ -1982,8 +1989,12 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame,
        // Wait for the GPU to be done with the frame.
        GLenum sync_status;
        do {
-               sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000);
+               sync_status = glClientWaitSync(frame.fence.get(), 0, 0);
                check_error();
+               if (sync_status == GL_TIMEOUT_EXPIRED) {
+                       // NVIDIA likes to busy-wait; yield instead.
+                       this_thread::sleep_for(milliseconds(1));
+               }
        } while (sync_status == GL_TIMEOUT_EXPIRED);
        assert(sync_status != GL_WAIT_FAILED);