]> git.sesse.net Git - nageru/blobdiff - decklink_output.cpp
Release Nageru 1.7.2.
[nageru] / decklink_output.cpp
index 55d29c5aad38c8d5be43f30ef677a2c9bb9ef032..a3d220b37c98bb96834415af5a277c47424b83cb 100644 (file)
@@ -2,6 +2,9 @@
 #include <movit/util.h>
 #include <movit/resource_pool.h>  // Must be above the Xlib includes.
 #include <pthread.h>
+#include <unistd.h>
+
+#include <mutex>
 
 #include <epoxy/egl.h>
 
 #include "decklink_output.h"
 #include "decklink_util.h"
 #include "flags.h"
+#include "metrics.h"
 #include "print_latency.h"
-#include "resource_pool.h"
 #include "timebase.h"
+#include "v210_converter.h"
 
 using namespace movit;
 using namespace std;
 using namespace std::chrono;
 
+namespace {
+
+// This class can be deleted during regular use, so make all the metrics static.
+once_flag decklink_metrics_inited;
+LatencyHistogram latency_histogram;
+atomic<int64_t> metric_decklink_output_width_pixels{-1};
+atomic<int64_t> metric_decklink_output_height_pixels{-1};
+atomic<int64_t> metric_decklink_output_frame_rate_den{-1};
+atomic<int64_t> metric_decklink_output_frame_rate_nom{-1};
+atomic<int64_t> metric_decklink_output_inflight_frames{0};
+atomic<int64_t> metric_decklink_output_color_mismatch_frames{0};
+
+atomic<int64_t> metric_decklink_output_scheduled_frames_dropped{0};
+atomic<int64_t> metric_decklink_output_scheduled_frames_late{0};
+atomic<int64_t> metric_decklink_output_scheduled_frames_normal{0};
+atomic<int64_t> metric_decklink_output_scheduled_frames_preroll{0};
+
+atomic<int64_t> metric_decklink_output_completed_frames_completed{0};
+atomic<int64_t> metric_decklink_output_completed_frames_dropped{0};
+atomic<int64_t> metric_decklink_output_completed_frames_flushed{0};
+atomic<int64_t> metric_decklink_output_completed_frames_late{0};
+atomic<int64_t> metric_decklink_output_completed_frames_unknown{0};
+
+atomic<int64_t> metric_decklink_output_scheduled_samples{0};
+
+Summary metric_decklink_output_margin_seconds;
+
+}  // namespace
+
 DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, unsigned width, unsigned height, unsigned card_index)
        : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index)
 {
        chroma_subsampler.reset(new ChromaSubsampler(resource_pool));
+
+       call_once(decklink_metrics_inited, [](){
+               latency_histogram.init("decklink_output");
+               global_metrics.add("decklink_output_width_pixels", &metric_decklink_output_width_pixels, Metrics::TYPE_GAUGE);
+               global_metrics.add("decklink_output_height_pixels", &metric_decklink_output_height_pixels, Metrics::TYPE_GAUGE);
+               global_metrics.add("decklink_output_frame_rate_den", &metric_decklink_output_frame_rate_den, Metrics::TYPE_GAUGE);
+               global_metrics.add("decklink_output_frame_rate_nom", &metric_decklink_output_frame_rate_nom, Metrics::TYPE_GAUGE);
+               global_metrics.add("decklink_output_inflight_frames", &metric_decklink_output_inflight_frames, Metrics::TYPE_GAUGE);
+               global_metrics.add("decklink_output_color_mismatch_frames", &metric_decklink_output_color_mismatch_frames);
+
+               global_metrics.add("decklink_output_scheduled_frames", {{ "status", "dropped" }}, &metric_decklink_output_scheduled_frames_dropped);
+               global_metrics.add("decklink_output_scheduled_frames", {{ "status", "late" }}, &metric_decklink_output_scheduled_frames_late);
+               global_metrics.add("decklink_output_scheduled_frames", {{ "status", "normal" }}, &metric_decklink_output_scheduled_frames_normal);
+               global_metrics.add("decklink_output_scheduled_frames", {{ "status", "preroll" }}, &metric_decklink_output_scheduled_frames_preroll);
+
+               global_metrics.add("decklink_output_completed_frames", {{ "status", "completed" }}, &metric_decklink_output_completed_frames_completed);
+               global_metrics.add("decklink_output_completed_frames", {{ "status", "dropped" }}, &metric_decklink_output_completed_frames_dropped);
+               global_metrics.add("decklink_output_completed_frames", {{ "status", "flushed" }}, &metric_decklink_output_completed_frames_flushed);
+               global_metrics.add("decklink_output_completed_frames", {{ "status", "late" }}, &metric_decklink_output_completed_frames_late);
+               global_metrics.add("decklink_output_completed_frames", {{ "status", "unknown" }}, &metric_decklink_output_completed_frames_unknown);
+
+               global_metrics.add("decklink_output_scheduled_samples", &metric_decklink_output_scheduled_samples);
+               vector<double> quantiles{0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99};
+               metric_decklink_output_margin_seconds.init(quantiles, 60.0);
+               global_metrics.add("decklink_output_margin_seconds", &metric_decklink_output_margin_seconds);
+       });
 }
 
-void DeckLinkOutput::set_device(IDeckLink *decklink)
+bool DeckLinkOutput::set_device(IDeckLink *decklink)
 {
        if (decklink->QueryInterface(IID_IDeckLinkOutput, (void**)&output) != S_OK) {
-               fprintf(stderr, "Card %u has no outputs\n", card_index);
-               exit(1);
+               fprintf(stderr, "Warning: Card %u has no outputs\n", card_index);
+               return false;
        }
 
        IDeckLinkDisplayModeIterator *mode_it;
        if (output->GetDisplayModeIterator(&mode_it) != S_OK) {
-               fprintf(stderr, "Failed to enumerate output display modes for card %u\n", card_index);
-               exit(1);
+               fprintf(stderr, "Warning: Failed to enumerate output display modes for card %u\n", card_index);
+               return false;
        }
 
        video_modes.clear();
@@ -59,6 +118,7 @@ void DeckLinkOutput::set_device(IDeckLink *decklink)
        // if they exist. We're not very likely to need analog outputs, so we don't need a way
        // to change beyond that.
        video_connection = pick_default_video_connection(decklink, BMDDeckLinkVideoOutputConnections, card_index);
+       return true;
 }
 
 void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
@@ -66,7 +126,12 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
        assert(output);
        assert(!playback_initiated);
 
-       should_quit = false;
+       if (video_modes.empty()) {
+               fprintf(stderr, "ERROR: No matching output modes for %dx%d found\n", width, height);
+               exit(1);
+       }
+
+       should_quit.unquit();
        playback_initiated = true;
        playback_started = false;
        this->base_pts = base_pts;
@@ -88,10 +153,16 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
                fprintf(stderr, "Failed to set PsF flag for card\n");
                exit(1);
        }
+       if (config->SetFlag(bmdDeckLinkConfigSMPTELevelAOutput, true) != S_OK) {
+               // This affects at least some no-name SDI->HDMI converters.
+               // Warn, but don't die.
+               fprintf(stderr, "WARNING: Failed to enable SMTPE Level A; resolutions like 1080p60 might have issues.\n");
+       }
 
        BMDDisplayModeSupport support;
        IDeckLinkDisplayMode *display_mode;
-       if (output->DoesSupportVideoMode(mode, bmdFormat8BitYUV, bmdVideoOutputFlagDefault,
+       BMDPixelFormat pixel_format = global_flags.ten_bit_output ? bmdFormat10BitYUV : bmdFormat8BitYUV;
+       if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault,
                                         &support, &display_mode) != S_OK) {
                fprintf(stderr, "Couldn't ask for format support\n");
                exit(1);
@@ -102,14 +173,7 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
                exit(1);
        }
 
-       BMDDisplayModeFlags flags = display_mode->GetFlags();
-       if ((flags & bmdDisplayModeColorspaceRec601) && global_flags.ycbcr_rec709_coefficients) {
-               fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n");
-               fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec601 (or =auto).\n");
-       } else if ((flags & bmdDisplayModeColorspaceRec709) && !global_flags.ycbcr_rec709_coefficients) {
-               fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n");
-               fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec709 (or =auto).\n");
-       }
+       current_mode_flags = display_mode->GetFlags();
 
        BMDTimeValue time_value;
        BMDTimeScale time_scale;
@@ -118,6 +182,11 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
                exit(1);
        }
 
+       metric_decklink_output_width_pixels = width;
+       metric_decklink_output_height_pixels = height;
+       metric_decklink_output_frame_rate_nom = time_value;
+       metric_decklink_output_frame_rate_den = time_scale;
+
        frame_duration = time_value * TIMEBASE / time_scale;
 
        display_mode->Release();
@@ -160,7 +229,7 @@ void DeckLinkOutput::end_output()
                return;
        }
 
-       should_quit = true;
+       should_quit.quit();
        frame_queues_changed.notify_all();
        present_thread.join();
        playback_initiated = false;
@@ -179,12 +248,34 @@ void DeckLinkOutput::end_output()
        }
 }
 
-void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, const vector<RefCountedFrame> &input_frames, int64_t pts, int64_t duration)
+void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, int64_t pts, int64_t duration)
 {
-       assert(!should_quit);
+       assert(!should_quit.should_quit());
 
-       unique_ptr<Frame> frame = move(get_frame());
-       chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
+       if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) {
+               if (!last_frame_had_mode_mismatch) {
+                       fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n");
+                       fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec601 (or =auto).\n");
+               }
+               last_frame_had_mode_mismatch = true;
+               ++metric_decklink_output_color_mismatch_frames;
+       } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) {
+               if (!last_frame_had_mode_mismatch) {
+                       fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n");
+                       fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec709 (or =auto).\n");
+               }
+               last_frame_had_mode_mismatch = true;
+               ++metric_decklink_output_color_mismatch_frames;
+       } else {
+               last_frame_had_mode_mismatch = false;
+       }
+
+       unique_ptr<Frame> frame = get_frame();
+       if (global_flags.ten_bit_output) {
+               chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
+       } else {
+               chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
+       }
 
        // Download the UYVY texture to the PBO.
        glPixelStorei(GL_PACK_ROW_LENGTH, 0);
@@ -193,10 +284,17 @@ void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, const vector<RefC
        glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
        check_error();
 
-       glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
-       check_error();
-       glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
-       check_error();
+       if (global_flags.ten_bit_output) {
+               glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
+               check_error();
+               glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0));
+               check_error();
+       } else {
+               glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
+               check_error();
+               glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
+               check_error();
+       }
 
        glBindTexture(GL_TEXTURE_2D, 0);
        check_error();
@@ -240,11 +338,12 @@ void DeckLinkOutput::send_audio(int64_t pts, const std::vector<float> &samples)
                        fprintf(stderr, "ScheduleAudioSamples() returned short write (%u/%ld)\n", frames_written, samples.size() / 2);
                }
        }
+       metric_decklink_output_scheduled_samples += samples.size() / 2;
 }
 
 void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *frame_duration, bool *is_preroll, steady_clock::time_point *frame_timestamp)
 {
-       assert(!should_quit);
+       assert(!should_quit.should_quit());
 
        *dropped_frames = 0;
        *frame_duration = this->frame_duration;
@@ -256,6 +355,7 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f
        // While prerolling, we send out frames as quickly as we can.
        if (target_time < base_pts) {
                *is_preroll = true;
+               ++metric_decklink_output_scheduled_frames_preroll;
                return;
        }
 
@@ -280,9 +380,13 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f
        *frame_timestamp = steady_clock::now() +
                nanoseconds((target_time - stream_frame_time) * 1000000000 / TIMEBASE);
 
+       metric_decklink_output_margin_seconds.count_event(
+               (target_time - stream_frame_time) / double(TIMEBASE));
+
        // If we're ahead of time, wait for the frame to (approximately) start.
        if (stream_frame_time < target_time) {
-               this_thread::sleep_until(*frame_timestamp);
+               should_quit.sleep_until(*frame_timestamp);
+               ++metric_decklink_output_scheduled_frames_normal;
                return;
        }
 
@@ -291,6 +395,7 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f
        if (stream_frame_time < target_time + max_overshoot) {
                fprintf(stderr, "Warning: Frame was %ld ms late (but not skipping it due to --output-slop-frames).\n",
                        lrint((stream_frame_time - target_time) * 1000.0 / TIMEBASE));
+               ++metric_decklink_output_scheduled_frames_late;
                return;
        }
 
@@ -300,6 +405,8 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f
        const int64_t ns_per_frame = this->frame_duration * 1000000000 / TIMEBASE;
        *frame_timestamp += nanoseconds(*dropped_frames * ns_per_frame);
        fprintf(stderr, "Dropped %d output frames; skipping.\n", *dropped_frames);
+       metric_decklink_output_scheduled_frames_dropped += *dropped_frames;
+       ++metric_decklink_output_scheduled_frames_normal;
 }
 
 uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const
@@ -329,35 +436,52 @@ uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const
        return best_mode;
 }
 
+YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const
+{
+       if (current_mode_flags & bmdDisplayModeColorspaceRec601) {
+               return YCBCR_REC_601;
+       } else {
+               // Don't bother checking bmdDisplayModeColorspaceRec709;
+               // if none is set, 709 is a good default anyway.
+               return YCBCR_REC_709;
+       }
+}
+
 HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result)
 {
        Frame *frame = static_cast<Frame *>(completedFrame);
        switch (result) {
        case bmdOutputFrameCompleted:
+               ++metric_decklink_output_completed_frames_completed;
                break;
        case bmdOutputFrameDisplayedLate:
                fprintf(stderr, "Output frame displayed late (pts=%ld)\n", frame->pts);
                fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
+               ++metric_decklink_output_completed_frames_late;
                break;
        case bmdOutputFrameDropped:
                fprintf(stderr, "Output frame was dropped (pts=%ld)\n", frame->pts);
                fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
+               ++metric_decklink_output_completed_frames_dropped;
                break;
        case bmdOutputFrameFlushed:
                fprintf(stderr, "Output frame was flushed (pts=%ld)\n", frame->pts);
+               ++metric_decklink_output_completed_frames_flushed;
                break;
        default:
                fprintf(stderr, "Output frame completed with unknown status %d\n", result);
+               ++metric_decklink_output_completed_frames_unknown;
                break;
        }
 
-       static int hei = 0;
-       print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &hei);
+       static int frameno = 0;
+       print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram);
 
        {
                lock_guard<mutex> lock(frame_queue_mutex);
                frame_freelist.push(unique_ptr<Frame>(frame));
                --num_frames_in_flight;
+               --metric_decklink_output_inflight_frames;
        }
 
        return S_OK;
@@ -381,17 +505,31 @@ unique_ptr<DeckLinkOutput::Frame> DeckLinkOutput::get_frame()
 
        unique_ptr<Frame> frame(new Frame);
 
-       frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
+       size_t stride;
+       if (global_flags.ten_bit_output) {
+               stride = v210Converter::get_v210_stride(width);
+               GLint v210_width = stride / sizeof(uint32_t);
+               frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height);
+
+               // We need valid texture state, or NVIDIA won't allow us to write to the texture.
+               glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
+               check_error();
+               glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+               check_error();
+       } else {
+               stride = width * 2;
+               frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
+       }
 
        glGenBuffers(1, &frame->pbo);
        check_error();
        glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
        check_error();
-       glBufferStorage(GL_PIXEL_PACK_BUFFER, width * height * 2, NULL, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+       glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
        check_error();
-       frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, width * height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+       frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
        check_error();
-       frame->uyvy_ptr_local.reset(new uint8_t[width * height * 2]);
+       frame->uyvy_ptr_local.reset(new uint8_t[stride * height]);
        frame->resource_pool = resource_pool;
 
        return frame;
@@ -405,21 +543,34 @@ void DeckLinkOutput::present_thread_func()
                {
                         unique_lock<mutex> lock(frame_queue_mutex);
                         frame_queues_changed.wait(lock, [this]{
-                                return should_quit || !pending_video_frames.empty();
+                                return should_quit.should_quit() || !pending_video_frames.empty();
                         });
-                        if (should_quit) {
+                        if (should_quit.should_quit()) {
                                return;
                        }
                        frame = move(pending_video_frames.front());
                        pending_video_frames.pop();
                        ++num_frames_in_flight;
+                       ++metric_decklink_output_inflight_frames;
                }
 
-               glWaitSync(frame->fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+               for ( ;; ) {
+                       int err = glClientWaitSync(frame->fence.get(), /*flags=*/0, 0);
+                       if (err == GL_TIMEOUT_EXPIRED) {
+                               // NVIDIA likes to busy-wait; yield instead.
+                               this_thread::sleep_for(milliseconds(1));
+                       } else {
+                               break;
+                       }
+               }
                check_error();
                frame->fence.reset();
 
-               memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
+               if (global_flags.ten_bit_output) {
+                       memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height);
+               } else {
+                       memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
+               }
 
                // Release any input frames we needed to render this frame.
                frame->input_frames.clear();
@@ -435,6 +586,7 @@ void DeckLinkOutput::present_thread_func()
                        lock_guard<mutex> lock(frame_queue_mutex);
                        frame_freelist.push(move(frame));
                        --num_frames_in_flight;
+                       --metric_decklink_output_inflight_frames;
                }
        }
 }
@@ -501,12 +653,20 @@ long DeckLinkOutput::Frame::GetHeight()
 
 long DeckLinkOutput::Frame::GetRowBytes()
 {
-       return global_flags.width * 2;
+       if (global_flags.ten_bit_output) {
+               return v210Converter::get_v210_stride(global_flags.width);
+       } else {
+               return global_flags.width * 2;
+       }
 }
 
 BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat()
 {
-       return bmdFormat8BitYUV;
+       if (global_flags.ten_bit_output) {
+               return bmdFormat10BitYUV;
+       } else {
+               return bmdFormat8BitYUV;
+       }
 }
 
 BMDFrameFlags DeckLinkOutput::Frame::GetFlags()