X-Git-Url: https://git.sesse.net/?p=nageru;a=blobdiff_plain;f=mixer.h;h=d1bf79519452113a49183e7539596f6c96600010;hp=f50eb58db5560f620b2fcdb1cc9097f4b6c5d1cb;hb=703e00da89118df9be0354dda621bed023e6030e;hpb=65d716be70e6295628dfa5bb0a72f3429b9696ba diff --git a/mixer.h b/mixer.h index f50eb58..d1bf795 100644 --- a/mixer.h +++ b/mixer.h @@ -3,53 +3,155 @@ // The actual video mixer, running in its own separate background thread. +#include #include + #undef Success + #include #include - -#include -#include -#include #include +#include #include #include #include +#include #include #include +#include #include #include #include +#include + +#include "audio_mixer.h" #include "bmusb/bmusb.h" -#include "alsa_output.h" -#include "ebu_r128_proc.h" -#include "h264encode.h" +#include "defs.h" #include "httpd.h" +#include "input_state.h" +#include "libusb.h" #include "pbo_frame_allocator.h" #include "ref_counted_frame.h" #include "ref_counted_gl_sync.h" -#include "resampling_queue.h" #include "theme.h" #include "timebase.h" -#include "stereocompressor.h" -#include "filter.h" -#include "input_state.h" +#include "video_encoder.h" +#include "ycbcr_interpretation.h" -class H264Encoder; +class ALSAOutput; +class ChromaSubsampler; +class DeckLinkOutput; class QSurface; +class QSurfaceFormat; +class TimecodeRenderer; +class v210Converter; + namespace movit { class Effect; class EffectChain; -class FlatInput; class ResourcePool; +class YCbCrInput; } // namespace movit -namespace movit { -class YCbCrInput; -} -class QOpenGLContext; -class QSurfaceFormat; +// A class to estimate the future jitter. Used in QueueLengthPolicy (see below). +// +// There are many ways to estimate jitter; I've tested a few ones (and also +// some algorithms that don't explicitly model jitter) with different +// parameters on some real-life data in experiments/queue_drop_policy.cpp. +// This is one based on simple order statistics where I've added some margin in +// the number of starvation events; I believe that about one every hour would +// probably be acceptable, but this one typically goes lower than that, at the +// cost of 2–3 ms extra latency. (If the queue is hard-limited to one frame, it's +// possible to get ~10 ms further down, but this would mean framedrops every +// second or so.) The general strategy is: Take the 99.9-percentile jitter over +// last 5000 frames, multiply by two, and that's our worst-case jitter +// estimate. The fact that we're not using the max value means that we could +// actually even throw away very late frames immediately, which means we only +// get one user-visible event instead of seeing something both when the frame +// arrives late (duplicate frame) and then again when we drop. +class JitterHistory { +private: + static constexpr size_t history_length = 5000; + static constexpr double percentile = 0.999; + static constexpr double multiplier = 2.0; + +public: + void register_metrics(const std::vector> &labels); + void unregister_metrics(const std::vector> &labels); + + void clear() { + history.clear(); + orders.clear(); + } + void frame_arrived(std::chrono::steady_clock::time_point now, int64_t frame_duration, size_t dropped_frames); + std::chrono::steady_clock::time_point get_expected_next_frame() const { return expected_timestamp; } + double estimate_max_jitter() const; + +private: + // A simple O(k) based algorithm for getting the k-th largest or + // smallest element from our window; we simply keep the multiset + // ordered (insertions and deletions are O(n) as always) and then + // iterate from one of the sides. If we had larger values of k, + // we could go for a more complicated setup with two sets or heaps + // (one increasing and one decreasing) that we keep balanced around + // the point, or it is possible to reimplement std::set with + // counts in each node. However, since k=5, we don't need this. + std::multiset orders; + std::deque::iterator> history; + + std::chrono::steady_clock::time_point expected_timestamp = std::chrono::steady_clock::time_point::min(); + + // Metrics. There are no direct summaries for jitter, since we already have latency summaries. + std::atomic metric_input_underestimated_jitter_frames{0}; + std::atomic metric_input_estimated_max_jitter_seconds{0.0 / 0.0}; +}; + +// For any card that's not the master (where we pick out the frames as they +// come, as fast as we can process), there's going to be a queue. The question +// is when we should drop frames from that queue (apart from the obvious +// dropping if the 16-frame queue should become full), especially given that +// the frame rate could be lower or higher than the master (either subtly or +// dramatically). We have two (conflicting) demands: +// +// 1. We want to avoid starving the queue. +// 2. We don't want to add more delay than is needed. +// +// Our general strategy is to drop as many frames as we can (helping for #2) +// that we think is safe for #1 given jitter. To this end, we measure the +// deviation from the expected arrival time for all cards, and use that for +// continuous jitter estimation. +// +// We then drop everything from the queue that we're sure we won't need to +// serve the output in the time before the next frame arrives. Typically, +// this means the queue will contain 0 or 1 frames, although more is also +// possible if the jitter is very high. +class QueueLengthPolicy { +public: + QueueLengthPolicy() {} + void reset(unsigned card_index) { + this->card_index = card_index; + } + + void register_metrics(const std::vector> &labels); + void unregister_metrics(const std::vector> &labels); + + // Call after picking out a frame, so 0 means starvation. + void update_policy(std::chrono::steady_clock::time_point now, + std::chrono::steady_clock::time_point expected_next_frame, + int64_t input_frame_duration, + int64_t master_frame_duration, + double max_input_card_jitter_seconds, + double max_master_card_jitter_seconds); + unsigned get_safe_queue_length() const { return safe_queue_length; } + +private: + unsigned card_index; // For debugging and metrics only. + unsigned safe_queue_length = 0; // Can never go below zero. + + // Metrics. + std::atomic metric_input_queue_safe_length_frames{1}; +}; class Mixer { public: @@ -95,18 +197,36 @@ public: return output_channel[output].get_display_frame(frame); } + // NOTE: Callbacks will be called with a mutex held, so you should probably + // not do real work in them. typedef std::function new_frame_ready_callback_t; - void set_frame_ready_callback(Output output, new_frame_ready_callback_t callback) + void add_frame_ready_callback(Output output, void *key, new_frame_ready_callback_t callback) + { + output_channel[output].add_frame_ready_callback(key, callback); + } + + void remove_frame_ready_callback(Output output, void *key) + { + output_channel[output].remove_frame_ready_callback(key); + } + + // TODO: Should this really be per-channel? Shouldn't it just be called for e.g. the live output? + typedef std::function &)> transition_names_updated_callback_t; + void set_transition_names_updated_callback(Output output, transition_names_updated_callback_t callback) + { + output_channel[output].set_transition_names_updated_callback(callback); + } + + typedef std::function name_updated_callback_t; + void set_name_updated_callback(Output output, name_updated_callback_t callback) { - output_channel[output].set_frame_ready_callback(callback); + output_channel[output].set_name_updated_callback(callback); } - typedef std::function audio_level_callback_t; - void set_audio_level_callback(audio_level_callback_t callback) + typedef std::function color_updated_callback_t; + void set_color_updated_callback(Output output, color_updated_callback_t callback) { - audio_level_callback = callback; + output_channel[output].set_color_updated_callback(callback); } std::vector get_transition_names() @@ -124,146 +244,368 @@ public: return theme->get_channel_name(channel); } - bool get_supports_set_wb(unsigned channel) const + std::string get_channel_color(unsigned channel) const { - return theme->get_supports_set_wb(channel); + return theme->get_channel_color(channel); } - void set_wb(unsigned channel, double r, double g, double b) const + int get_channel_signal(unsigned channel) const { - theme->set_wb(channel, r, g, b); + return theme->get_channel_signal(channel); } - void set_locut_cutoff(float cutoff_hz) + int map_signal(unsigned channel) { - locut_cutoff_hz = cutoff_hz; + return theme->map_signal(channel); } - float get_limiter_threshold_dbfs() + unsigned get_master_clock() const { - return limiter_threshold_dbfs; + return master_clock_channel; } - float get_compressor_threshold_dbfs() + void set_master_clock(unsigned channel) { - return compressor_threshold_dbfs; + master_clock_channel = channel; } - void set_limiter_threshold_dbfs(float threshold_dbfs) + void set_signal_mapping(int signal, int card) { - limiter_threshold_dbfs = threshold_dbfs; + return theme->set_signal_mapping(signal, card); } - void set_compressor_threshold_dbfs(float threshold_dbfs) + YCbCrInterpretation get_input_ycbcr_interpretation(unsigned card_index) const; + void set_input_ycbcr_interpretation(unsigned card_index, const YCbCrInterpretation &interpretation); + + bool get_supports_set_wb(unsigned channel) const { - compressor_threshold_dbfs = threshold_dbfs; + return theme->get_supports_set_wb(channel); } - void set_limiter_enabled(bool enabled) + void set_wb(unsigned channel, double r, double g, double b) const { - limiter_enabled = enabled; + theme->set_wb(channel, r, g, b); } - void set_compressor_enabled(bool enabled) + // Note: You can also get this through the global variable global_audio_mixer. + AudioMixer *get_audio_mixer() { return audio_mixer.get(); } + const AudioMixer *get_audio_mixer() const { return audio_mixer.get(); } + + void schedule_cut() { - compressor_enabled = enabled; + should_cut = true; } - void set_gain_staging_db(float gain_db) - { - std::unique_lock lock(level_compressor_mutex); - level_compressor_enabled = false; - gain_staging_db = gain_db; + unsigned get_num_cards() const { return num_cards; } + + std::string get_card_description(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_description(); } - void set_gain_staging_auto(bool enabled) - { - std::unique_lock lock(level_compressor_mutex); - level_compressor_enabled = enabled; + // The difference between this and the previous function is that if a card + // is used as the current output, get_card_description() will return the + // fake card that's replacing it for input, whereas this function will return + // the card's actual name. + std::string get_output_card_description(unsigned card_index) const { + assert(card_can_be_used_as_output(card_index)); + assert(card_index < num_cards); + if (cards[card_index].parked_capture) { + return cards[card_index].parked_capture->get_description(); + } else { + return cards[card_index].capture->get_description(); + } } - void schedule_cut() + bool card_can_be_used_as_output(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].output != nullptr; + } + + bool card_is_ffmpeg(unsigned card_index) const { + assert(card_index < num_cards + num_video_inputs); + return cards[card_index].type == CardType::FFMPEG_INPUT; + } + + std::map get_available_video_modes(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_available_video_modes(); + } + + uint32_t get_current_video_mode(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_current_video_mode(); + } + + void set_video_mode(unsigned card_index, uint32_t mode) { + assert(card_index < num_cards); + cards[card_index].capture->set_video_mode(mode); + } + + void start_mode_scanning(unsigned card_index); + + std::map get_available_video_inputs(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_available_video_inputs(); + } + + uint32_t get_current_video_input(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_current_video_input(); + } + + void set_video_input(unsigned card_index, uint32_t input) { + assert(card_index < num_cards); + cards[card_index].capture->set_video_input(input); + } + + std::map get_available_audio_inputs(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_available_audio_inputs(); + } + + uint32_t get_current_audio_input(unsigned card_index) const { + assert(card_index < num_cards); + return cards[card_index].capture->get_current_audio_input(); + } + + void set_audio_input(unsigned card_index, uint32_t input) { + assert(card_index < num_cards); + cards[card_index].capture->set_audio_input(input); + } + + std::string get_ffmpeg_filename(unsigned card_index) const; + + void set_ffmpeg_filename(unsigned card_index, const std::string &filename); + + void change_x264_bitrate(unsigned rate_kbit) { + video_encoder->change_x264_bitrate(rate_kbit); + } + + int get_output_card_index() const { // -1 = no output, just stream. + return desired_output_card_index; + } + + void set_output_card(int card_index) { // -1 = no output, just stream. + desired_output_card_index = card_index; + } + + std::map get_available_output_video_modes() const; + + uint32_t get_output_video_mode() const { + return desired_output_video_mode; + } + + void set_output_video_mode(uint32_t mode) { + desired_output_video_mode = mode; + } + + void set_display_timecode_in_stream(bool enable) { + display_timecode_in_stream = enable; + } + + void set_display_timecode_on_stdout(bool enable) { + display_timecode_on_stdout = enable; + } + + int64_t get_num_connected_clients() const { + return httpd.get_num_connected_clients(); + } + + std::vector get_theme_menu() { return theme->get_theme_menu(); } + + void theme_menu_entry_clicked(int lua_ref) { return theme->theme_menu_entry_clicked(lua_ref); } + + void set_theme_menu_callback(std::function callback) { - should_cut = true; + theme->set_theme_menu_callback(callback); } - void reset_meters(); + void wait_for_next_frame(); private: + struct CaptureCard; + + enum class CardType { + LIVE_CARD, + FAKE_CAPTURE, + FFMPEG_INPUT, + CEF_INPUT, + }; + void configure_card(unsigned card_index, bmusb::CaptureInterface *capture, CardType card_type, DeckLinkOutput *output); + void set_output_card_internal(int card_index); // Should only be called from the mixer thread. void bm_frame(unsigned card_index, uint16_t timecode, - FrameAllocator::Frame video_frame, size_t video_offset, uint16_t video_format, - FrameAllocator::Frame audio_frame, size_t audio_offset, uint16_t audio_format); + bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format, + bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format); + void bm_hotplug_add(libusb_device *dev); + void bm_hotplug_remove(unsigned card_index); void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1); void thread_func(); + void handle_hotplugged_cards(); + void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame, bool is_preroll, std::chrono::steady_clock::time_point frame_timestamp); + std::string get_timecode_text() const; + void render_one_frame(int64_t duration); void audio_thread_func(); - void process_audio_one_frame(int64_t frame_pts_int, int num_samples); - void subsample_chroma(GLuint src_tex, GLuint dst_dst); void release_display_frame(DisplayFrame *frame); double pts() { return double(pts_int) / TIMEBASE; } + void trim_queue(CaptureCard *card, size_t safe_queue_length); + std::pair get_channels_json(); + std::pair get_channel_color_http(unsigned channel_idx); HTTPD httpd; - unsigned num_cards; + unsigned num_cards, num_video_inputs, num_html_inputs = 0; - QSurface *mixer_surface, *h264_encoder_surface; + QSurface *mixer_surface, *h264_encoder_surface, *decklink_output_surface; std::unique_ptr resource_pool; std::unique_ptr theme; + std::atomic audio_source_channel{0}; + std::atomic master_clock_channel{0}; // Gets overridden by if set. + int output_card_index = -1; // -1 for none. + uint32_t output_video_mode = -1; + + // The mechanics of changing the output card and modes are so intricately connected + // with the work the mixer thread is doing. Thus, we don't change it directly, + // we just set this variable instead, which signals to the mixer thread that + // it should do the change before the next frame. This simplifies locking + // considerations immensely. + std::atomic desired_output_card_index{-1}; + std::atomic desired_output_video_mode{0}; + std::unique_ptr display_chain; - GLuint cbcr_program_num; // Owned by . - std::unique_ptr h264_encoder; + std::unique_ptr chroma_subsampler; + std::unique_ptr v210_converter; + std::unique_ptr video_encoder; + + std::unique_ptr timecode_renderer; + std::atomic display_timecode_in_stream{false}; + std::atomic display_timecode_on_stdout{false}; // Effects part of . Owned by . - movit::FlatInput *display_input; + movit::YCbCrInput *display_input; int64_t pts_int = 0; // In TIMEBASE units. - std::mutex bmusb_mutex; + mutable std::mutex frame_num_mutex; + std::condition_variable frame_num_updated; + unsigned frame_num = 0; // Under . + + // Accumulated errors in number of 1/TIMEBASE audio samples. If OUTPUT_FREQUENCY divided by + // frame rate is integer, will always stay zero. + unsigned fractional_samples = 0; + + mutable std::mutex card_mutex; + bool has_bmusb_thread = false; struct CaptureCard { - BMUSBCapture *usb; + std::unique_ptr capture; + bool is_fake_capture; + CardType type; + std::unique_ptr output; + + // CEF only delivers frames when it actually has a change. + // If we trim the queue for latency reasons, we could thus + // end up in a situation trimming a frame that was meant to + // be displayed for a long time, which is really suboptimal. + // Thus, if we drop the last frame we have, may_have_dropped_last_frame + // is set to true, and the next starvation event will trigger + // us requestin a CEF repaint. + bool is_cef_capture, may_have_dropped_last_frame = false; + + // If this card is used for output (ie., output_card_index points to it), + // it cannot simultaneously be uesd for capture, so gets replaced + // by a FakeCapture. However, since reconstructing the real capture object + // with all its state can be annoying, it is not being deleted, just stopped + // and moved here. + std::unique_ptr parked_capture; + std::unique_ptr frame_allocator; // Stuff for the OpenGL context (for texture uploading). - QSurface *surface; - QOpenGLContext *context; - - bool new_data_ready = false; // Whether new_frame contains anything. + QSurface *surface = nullptr; + + struct NewFrame { + RefCountedFrame frame; + int64_t length; // In TIMEBASE units. + bool interlaced; + unsigned field; // Which field (0 or 1) of the frame to use. Always 0 for progressive. + std::function upload_func; // Needs to be called to actually upload the texture to OpenGL. + unsigned dropped_frames = 0; // Number of dropped frames before this one. + std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min(); + }; + std::deque new_frames; bool should_quit = false; - RefCountedFrame new_frame; - int64_t new_frame_length; // In TIMEBASE units. - bool new_frame_interlaced; - unsigned new_frame_field; // Which field (0 or 1) of the frame to use. Always 0 for progressive. - GLsync new_data_ready_fence; // Whether new_frame is ready for rendering. - std::condition_variable new_data_ready_changed; // Set whenever new_data_ready is changed. - unsigned dropped_frames = 0; // Before new_frame. - - // Accumulated errors in number of 1/TIMEBASE samples. If OUTPUT_FREQUENCY divided by - // frame rate is integer, will always stay zero. - unsigned fractional_samples = 0; - - std::mutex audio_mutex; - std::unique_ptr resampling_queue; // Under audio_mutex. + std::condition_variable new_frames_changed; // Set whenever new_frames (or should_quit) is changed. + + QueueLengthPolicy queue_length_policy; // Refers to the "new_frames" queue. + int last_timecode = -1; // Unwrapped. - int64_t next_local_pts = 0; // Beginning of next frame, in TIMEBASE units. + + JitterHistory jitter_history; + + // Metrics. + std::vector> labels; + std::atomic metric_input_received_frames{0}; + std::atomic metric_input_duped_frames{0}; + std::atomic metric_input_dropped_frames_jitter{0}; + std::atomic metric_input_dropped_frames_error{0}; + std::atomic metric_input_resets{0}; + std::atomic metric_input_queue_length_frames{0}; + + std::atomic metric_input_has_signal_bool{-1}; + std::atomic metric_input_is_connected_bool{-1}; + std::atomic metric_input_interlaced_bool{-1}; + std::atomic metric_input_width_pixels{-1}; + std::atomic metric_input_height_pixels{-1}; + std::atomic metric_input_frame_rate_nom{-1}; + std::atomic metric_input_frame_rate_den{-1}; + std::atomic metric_input_sample_rate_hz{-1}; + }; + JitterHistory output_jitter_history; + CaptureCard cards[MAX_VIDEO_CARDS]; // Protected by . + YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS]; // Protected by . + std::unique_ptr audio_mixer; // Same as global_audio_mixer (see audio_mixer.h). + bool input_card_is_master_clock(unsigned card_index, unsigned master_card_index) const; + struct OutputFrameInfo { + int dropped_frames; // Since last frame. + int num_samples; // Audio samples needed for this output frame. + int64_t frame_duration; // In TIMEBASE units. + bool is_preroll; + std::chrono::steady_clock::time_point frame_timestamp; }; - CaptureCard cards[MAX_CARDS]; // protected by + OutputFrameInfo get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS]); InputState input_state; + // Cards we have been noticed about being hotplugged, but haven't tried adding yet. + // Protected by its own mutex. + std::mutex hotplug_mutex; + std::vector hotplugged_cards; + class OutputChannel { public: ~OutputChannel(); - void output_frame(DisplayFrame frame); + void output_frame(DisplayFrame &&frame); bool get_display_frame(DisplayFrame *frame); - void set_frame_ready_callback(new_frame_ready_callback_t callback); + void add_frame_ready_callback(void *key, new_frame_ready_callback_t callback); + void remove_frame_ready_callback(void *key); + void set_transition_names_updated_callback(transition_names_updated_callback_t callback); + void set_name_updated_callback(name_updated_callback_t callback); + void set_color_updated_callback(color_updated_callback_t callback); private: friend class Mixer; + unsigned channel; Mixer *parent = nullptr; // Not owned. std::mutex frame_mutex; DisplayFrame current_frame, ready_frame; // protected by bool has_current_frame = false, has_ready_frame = false; // protected by - new_frame_ready_callback_t new_frame_ready_callback; - bool has_new_frame_ready_callback = false; + std::map new_frame_ready_callbacks; // protected by + transition_names_updated_callback_t transition_names_updated_callback; + name_updated_callback_t name_updated_callback; + color_updated_callback_t color_updated_callback; + + std::vector last_transition_names; + std::string last_name, last_color; }; OutputChannel output_channel[NUM_OUTPUTS]; @@ -272,40 +614,23 @@ private: std::atomic should_quit{false}; std::atomic should_cut{false}; - audio_level_callback_t audio_level_callback = nullptr; - std::mutex compressor_mutex; - Ebu_r128_proc r128; // Under compressor_mutex. - - Resampler peak_resampler; - std::atomic peak{0.0f}; - - StereoFilter locut; // Default cutoff 150 Hz, 24 dB/oct. - std::atomic locut_cutoff_hz; - - // First compressor; takes us up to about -12 dBFS. - std::mutex level_compressor_mutex; - StereoCompressor level_compressor; // Under compressor_mutex. Used to set/override gain_staging_db if . - float gain_staging_db = 0.0f; // Under compressor_mutex. - bool level_compressor_enabled = true; // Under compressor_mutex. - - static constexpr float ref_level_dbfs = -14.0f; - - StereoCompressor limiter; - std::atomic limiter_threshold_dbfs{ref_level_dbfs + 4.0f}; // 4 dB. - std::atomic limiter_enabled{true}; - StereoCompressor compressor; - std::atomic compressor_threshold_dbfs{ref_level_dbfs - 12.0f}; // -12 dB. - std::atomic compressor_enabled{true}; - std::unique_ptr alsa; struct AudioTask { int64_t pts_int; int num_samples; + bool adjust_rate; + std::chrono::steady_clock::time_point frame_timestamp; }; std::mutex audio_mutex; std::condition_variable audio_task_queue_changed; std::queue audio_task_queue; // Under audio_mutex. + + // For mode scanning. + bool is_mode_scanning[MAX_VIDEO_CARDS]{ false }; + std::vector mode_scanlist[MAX_VIDEO_CARDS]; + unsigned mode_scanlist_index[MAX_VIDEO_CARDS]{ 0 }; + std::chrono::steady_clock::time_point last_mode_scan_change[MAX_VIDEO_CARDS]; }; extern Mixer *global_mixer;