X-Git-Url: https://git.sesse.net/?p=nageru;a=blobdiff_plain;f=decklink_output.cpp;h=a3d220b37c98bb96834415af5a277c47424b83cb;hp=55d29c5aad38c8d5be43f30ef677a2c9bb9ef032;hb=703e00da89118df9be0354dda621bed023e6030e;hpb=9aabf53d2ee424e66e3d51039a7a8ecce25f3516 diff --git a/decklink_output.cpp b/decklink_output.cpp index 55d29c5..a3d220b 100644 --- a/decklink_output.cpp +++ b/decklink_output.cpp @@ -2,6 +2,9 @@ #include #include // Must be above the Xlib includes. #include +#include + +#include #include @@ -9,31 +12,87 @@ #include "decklink_output.h" #include "decklink_util.h" #include "flags.h" +#include "metrics.h" #include "print_latency.h" -#include "resource_pool.h" #include "timebase.h" +#include "v210_converter.h" using namespace movit; using namespace std; using namespace std::chrono; +namespace { + +// This class can be deleted during regular use, so make all the metrics static. +once_flag decklink_metrics_inited; +LatencyHistogram latency_histogram; +atomic metric_decklink_output_width_pixels{-1}; +atomic metric_decklink_output_height_pixels{-1}; +atomic metric_decklink_output_frame_rate_den{-1}; +atomic metric_decklink_output_frame_rate_nom{-1}; +atomic metric_decklink_output_inflight_frames{0}; +atomic metric_decklink_output_color_mismatch_frames{0}; + +atomic metric_decklink_output_scheduled_frames_dropped{0}; +atomic metric_decklink_output_scheduled_frames_late{0}; +atomic metric_decklink_output_scheduled_frames_normal{0}; +atomic metric_decklink_output_scheduled_frames_preroll{0}; + +atomic metric_decklink_output_completed_frames_completed{0}; +atomic metric_decklink_output_completed_frames_dropped{0}; +atomic metric_decklink_output_completed_frames_flushed{0}; +atomic metric_decklink_output_completed_frames_late{0}; +atomic metric_decklink_output_completed_frames_unknown{0}; + +atomic metric_decklink_output_scheduled_samples{0}; + +Summary metric_decklink_output_margin_seconds; + +} // namespace + DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, unsigned width, unsigned height, unsigned card_index) : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index) { chroma_subsampler.reset(new ChromaSubsampler(resource_pool)); + + call_once(decklink_metrics_inited, [](){ + latency_histogram.init("decklink_output"); + global_metrics.add("decklink_output_width_pixels", &metric_decklink_output_width_pixels, Metrics::TYPE_GAUGE); + global_metrics.add("decklink_output_height_pixels", &metric_decklink_output_height_pixels, Metrics::TYPE_GAUGE); + global_metrics.add("decklink_output_frame_rate_den", &metric_decklink_output_frame_rate_den, Metrics::TYPE_GAUGE); + global_metrics.add("decklink_output_frame_rate_nom", &metric_decklink_output_frame_rate_nom, Metrics::TYPE_GAUGE); + global_metrics.add("decklink_output_inflight_frames", &metric_decklink_output_inflight_frames, Metrics::TYPE_GAUGE); + global_metrics.add("decklink_output_color_mismatch_frames", &metric_decklink_output_color_mismatch_frames); + + global_metrics.add("decklink_output_scheduled_frames", {{ "status", "dropped" }}, &metric_decklink_output_scheduled_frames_dropped); + global_metrics.add("decklink_output_scheduled_frames", {{ "status", "late" }}, &metric_decklink_output_scheduled_frames_late); + global_metrics.add("decklink_output_scheduled_frames", {{ "status", "normal" }}, &metric_decklink_output_scheduled_frames_normal); + global_metrics.add("decklink_output_scheduled_frames", {{ "status", "preroll" }}, &metric_decklink_output_scheduled_frames_preroll); + + global_metrics.add("decklink_output_completed_frames", {{ "status", "completed" }}, &metric_decklink_output_completed_frames_completed); + global_metrics.add("decklink_output_completed_frames", {{ "status", "dropped" }}, &metric_decklink_output_completed_frames_dropped); + global_metrics.add("decklink_output_completed_frames", {{ "status", "flushed" }}, &metric_decklink_output_completed_frames_flushed); + global_metrics.add("decklink_output_completed_frames", {{ "status", "late" }}, &metric_decklink_output_completed_frames_late); + global_metrics.add("decklink_output_completed_frames", {{ "status", "unknown" }}, &metric_decklink_output_completed_frames_unknown); + + global_metrics.add("decklink_output_scheduled_samples", &metric_decklink_output_scheduled_samples); + vector quantiles{0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99}; + metric_decklink_output_margin_seconds.init(quantiles, 60.0); + global_metrics.add("decklink_output_margin_seconds", &metric_decklink_output_margin_seconds); + }); } -void DeckLinkOutput::set_device(IDeckLink *decklink) +bool DeckLinkOutput::set_device(IDeckLink *decklink) { if (decklink->QueryInterface(IID_IDeckLinkOutput, (void**)&output) != S_OK) { - fprintf(stderr, "Card %u has no outputs\n", card_index); - exit(1); + fprintf(stderr, "Warning: Card %u has no outputs\n", card_index); + return false; } IDeckLinkDisplayModeIterator *mode_it; if (output->GetDisplayModeIterator(&mode_it) != S_OK) { - fprintf(stderr, "Failed to enumerate output display modes for card %u\n", card_index); - exit(1); + fprintf(stderr, "Warning: Failed to enumerate output display modes for card %u\n", card_index); + return false; } video_modes.clear(); @@ -59,6 +118,7 @@ void DeckLinkOutput::set_device(IDeckLink *decklink) // if they exist. We're not very likely to need analog outputs, so we don't need a way // to change beyond that. video_connection = pick_default_video_connection(decklink, BMDDeckLinkVideoOutputConnections, card_index); + return true; } void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts) @@ -66,7 +126,12 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts) assert(output); assert(!playback_initiated); - should_quit = false; + if (video_modes.empty()) { + fprintf(stderr, "ERROR: No matching output modes for %dx%d found\n", width, height); + exit(1); + } + + should_quit.unquit(); playback_initiated = true; playback_started = false; this->base_pts = base_pts; @@ -88,10 +153,16 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts) fprintf(stderr, "Failed to set PsF flag for card\n"); exit(1); } + if (config->SetFlag(bmdDeckLinkConfigSMPTELevelAOutput, true) != S_OK) { + // This affects at least some no-name SDI->HDMI converters. + // Warn, but don't die. + fprintf(stderr, "WARNING: Failed to enable SMTPE Level A; resolutions like 1080p60 might have issues.\n"); + } BMDDisplayModeSupport support; IDeckLinkDisplayMode *display_mode; - if (output->DoesSupportVideoMode(mode, bmdFormat8BitYUV, bmdVideoOutputFlagDefault, + BMDPixelFormat pixel_format = global_flags.ten_bit_output ? bmdFormat10BitYUV : bmdFormat8BitYUV; + if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault, &support, &display_mode) != S_OK) { fprintf(stderr, "Couldn't ask for format support\n"); exit(1); @@ -102,14 +173,7 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts) exit(1); } - BMDDisplayModeFlags flags = display_mode->GetFlags(); - if ((flags & bmdDisplayModeColorspaceRec601) && global_flags.ycbcr_rec709_coefficients) { - fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n"); - fprintf(stderr, " Consider --output-ycbcr-coefficients=rec601 (or =auto).\n"); - } else if ((flags & bmdDisplayModeColorspaceRec709) && !global_flags.ycbcr_rec709_coefficients) { - fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n"); - fprintf(stderr, " Consider --output-ycbcr-coefficients=rec709 (or =auto).\n"); - } + current_mode_flags = display_mode->GetFlags(); BMDTimeValue time_value; BMDTimeScale time_scale; @@ -118,6 +182,11 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts) exit(1); } + metric_decklink_output_width_pixels = width; + metric_decklink_output_height_pixels = height; + metric_decklink_output_frame_rate_nom = time_value; + metric_decklink_output_frame_rate_den = time_scale; + frame_duration = time_value * TIMEBASE / time_scale; display_mode->Release(); @@ -160,7 +229,7 @@ void DeckLinkOutput::end_output() return; } - should_quit = true; + should_quit.quit(); frame_queues_changed.notify_all(); present_thread.join(); playback_initiated = false; @@ -179,12 +248,34 @@ void DeckLinkOutput::end_output() } } -void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, const vector &input_frames, int64_t pts, int64_t duration) +void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector &input_frames, int64_t pts, int64_t duration) { - assert(!should_quit); + assert(!should_quit.should_quit()); - unique_ptr frame = move(get_frame()); - chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex); + if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) { + if (!last_frame_had_mode_mismatch) { + fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n"); + fprintf(stderr, " Consider --output-ycbcr-coefficients=rec601 (or =auto).\n"); + } + last_frame_had_mode_mismatch = true; + ++metric_decklink_output_color_mismatch_frames; + } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) { + if (!last_frame_had_mode_mismatch) { + fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n"); + fprintf(stderr, " Consider --output-ycbcr-coefficients=rec709 (or =auto).\n"); + } + last_frame_had_mode_mismatch = true; + ++metric_decklink_output_color_mismatch_frames; + } else { + last_frame_had_mode_mismatch = false; + } + + unique_ptr frame = get_frame(); + if (global_flags.ten_bit_output) { + chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex); + } else { + chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex); + } // Download the UYVY texture to the PBO. glPixelStorei(GL_PACK_ROW_LENGTH, 0); @@ -193,10 +284,17 @@ void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, const vectorpbo); check_error(); - glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex); - check_error(); - glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0)); - check_error(); + if (global_flags.ten_bit_output) { + glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex); + check_error(); + glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0)); + check_error(); + } else { + glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex); + check_error(); + glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0)); + check_error(); + } glBindTexture(GL_TEXTURE_2D, 0); check_error(); @@ -240,11 +338,12 @@ void DeckLinkOutput::send_audio(int64_t pts, const std::vector &samples) fprintf(stderr, "ScheduleAudioSamples() returned short write (%u/%ld)\n", frames_written, samples.size() / 2); } } + metric_decklink_output_scheduled_samples += samples.size() / 2; } void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *frame_duration, bool *is_preroll, steady_clock::time_point *frame_timestamp) { - assert(!should_quit); + assert(!should_quit.should_quit()); *dropped_frames = 0; *frame_duration = this->frame_duration; @@ -256,6 +355,7 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f // While prerolling, we send out frames as quickly as we can. if (target_time < base_pts) { *is_preroll = true; + ++metric_decklink_output_scheduled_frames_preroll; return; } @@ -280,9 +380,13 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f *frame_timestamp = steady_clock::now() + nanoseconds((target_time - stream_frame_time) * 1000000000 / TIMEBASE); + metric_decklink_output_margin_seconds.count_event( + (target_time - stream_frame_time) / double(TIMEBASE)); + // If we're ahead of time, wait for the frame to (approximately) start. if (stream_frame_time < target_time) { - this_thread::sleep_until(*frame_timestamp); + should_quit.sleep_until(*frame_timestamp); + ++metric_decklink_output_scheduled_frames_normal; return; } @@ -291,6 +395,7 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f if (stream_frame_time < target_time + max_overshoot) { fprintf(stderr, "Warning: Frame was %ld ms late (but not skipping it due to --output-slop-frames).\n", lrint((stream_frame_time - target_time) * 1000.0 / TIMEBASE)); + ++metric_decklink_output_scheduled_frames_late; return; } @@ -300,6 +405,8 @@ void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *f const int64_t ns_per_frame = this->frame_duration * 1000000000 / TIMEBASE; *frame_timestamp += nanoseconds(*dropped_frames * ns_per_frame); fprintf(stderr, "Dropped %d output frames; skipping.\n", *dropped_frames); + metric_decklink_output_scheduled_frames_dropped += *dropped_frames; + ++metric_decklink_output_scheduled_frames_normal; } uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const @@ -329,35 +436,52 @@ uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const return best_mode; } +YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const +{ + if (current_mode_flags & bmdDisplayModeColorspaceRec601) { + return YCBCR_REC_601; + } else { + // Don't bother checking bmdDisplayModeColorspaceRec709; + // if none is set, 709 is a good default anyway. + return YCBCR_REC_709; + } +} + HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result) { Frame *frame = static_cast(completedFrame); switch (result) { case bmdOutputFrameCompleted: + ++metric_decklink_output_completed_frames_completed; break; case bmdOutputFrameDisplayedLate: fprintf(stderr, "Output frame displayed late (pts=%ld)\n", frame->pts); fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n"); + ++metric_decklink_output_completed_frames_late; break; case bmdOutputFrameDropped: fprintf(stderr, "Output frame was dropped (pts=%ld)\n", frame->pts); fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n"); + ++metric_decklink_output_completed_frames_dropped; break; case bmdOutputFrameFlushed: fprintf(stderr, "Output frame was flushed (pts=%ld)\n", frame->pts); + ++metric_decklink_output_completed_frames_flushed; break; default: fprintf(stderr, "Output frame completed with unknown status %d\n", result); + ++metric_decklink_output_completed_frames_unknown; break; } - static int hei = 0; - print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &hei); + static int frameno = 0; + print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram); { lock_guard lock(frame_queue_mutex); frame_freelist.push(unique_ptr(frame)); --num_frames_in_flight; + --metric_decklink_output_inflight_frames; } return S_OK; @@ -381,17 +505,31 @@ unique_ptr DeckLinkOutput::get_frame() unique_ptr frame(new Frame); - frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height); + size_t stride; + if (global_flags.ten_bit_output) { + stride = v210Converter::get_v210_stride(width); + GLint v210_width = stride / sizeof(uint32_t); + frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height); + + // We need valid texture state, or NVIDIA won't allow us to write to the texture. + glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + } else { + stride = width * 2; + frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height); + } glGenBuffers(1, &frame->pbo); check_error(); glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo); check_error(); - glBufferStorage(GL_PIXEL_PACK_BUFFER, width * height * 2, NULL, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); check_error(); - frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, width * height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); check_error(); - frame->uyvy_ptr_local.reset(new uint8_t[width * height * 2]); + frame->uyvy_ptr_local.reset(new uint8_t[stride * height]); frame->resource_pool = resource_pool; return frame; @@ -405,21 +543,34 @@ void DeckLinkOutput::present_thread_func() { unique_lock lock(frame_queue_mutex); frame_queues_changed.wait(lock, [this]{ - return should_quit || !pending_video_frames.empty(); + return should_quit.should_quit() || !pending_video_frames.empty(); }); - if (should_quit) { + if (should_quit.should_quit()) { return; } frame = move(pending_video_frames.front()); pending_video_frames.pop(); ++num_frames_in_flight; + ++metric_decklink_output_inflight_frames; } - glWaitSync(frame->fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); + for ( ;; ) { + int err = glClientWaitSync(frame->fence.get(), /*flags=*/0, 0); + if (err == GL_TIMEOUT_EXPIRED) { + // NVIDIA likes to busy-wait; yield instead. + this_thread::sleep_for(milliseconds(1)); + } else { + break; + } + } check_error(); frame->fence.reset(); - memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2); + if (global_flags.ten_bit_output) { + memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height); + } else { + memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2); + } // Release any input frames we needed to render this frame. frame->input_frames.clear(); @@ -435,6 +586,7 @@ void DeckLinkOutput::present_thread_func() lock_guard lock(frame_queue_mutex); frame_freelist.push(move(frame)); --num_frames_in_flight; + --metric_decklink_output_inflight_frames; } } } @@ -501,12 +653,20 @@ long DeckLinkOutput::Frame::GetHeight() long DeckLinkOutput::Frame::GetRowBytes() { - return global_flags.width * 2; + if (global_flags.ten_bit_output) { + return v210Converter::get_v210_stride(global_flags.width); + } else { + return global_flags.width * 2; + } } BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat() { - return bmdFormat8BitYUV; + if (global_flags.ten_bit_output) { + return bmdFormat10BitYUV; + } else { + return bmdFormat8BitYUV; + } } BMDFrameFlags DeckLinkOutput::Frame::GetFlags()