Fix a dependency.

[nageru] / mixer.h
diff --git a/mixer.h b/mixer.h

index 354735da57d9e7a6880a18a6bab5f7cc3350ccbb..aa3baac2fff99e90b1113f61af65e7f82761f42e 100644 (file)
--- a/mixer.h
+++ b/mixer.h
@@ -5,14 +5,11 @@
  
  #include <assert.h>
  #include <epoxy/gl.h>
+
  #undef Success
  
-#include <movit/effect_chain.h>
-#include <movit/flat_input.h>
  #include <stdbool.h>
  #include <stdint.h>
-#include <zita-resampler/resampler.h>
-
  #include <atomic>
  #include <chrono>
  #include <condition_variable>
@@ -26,37 +23,89 @@
  #include <thread>
  #include <vector>
  
-#include "alsa_output.h"
+#include <movit/image_format.h>
+
  #include "audio_mixer.h"
  #include "bmusb/bmusb.h"
-#include "correlation_measurer.h"
  #include "defs.h"
-#include "ebu_r128_proc.h"
  #include "httpd.h"
  #include "input_state.h"
+#include "libusb.h"
  #include "pbo_frame_allocator.h"
  #include "ref_counted_frame.h"
  #include "ref_counted_gl_sync.h"
-#include "resampling_queue.h"
  #include "theme.h"
  #include "timebase.h"
-#include "stereocompressor.h"
  #include "video_encoder.h"
+#include "ycbcr_interpretation.h"
  
  class ALSAOutput;
+class ChromaSubsampler;
+class DeckLinkOutput;
  class QSurface;
-class QuickSyncEncoder;
+class QSurfaceFormat;
+class TimecodeRenderer;
+class v210Converter;
+
  namespace movit {
  class Effect;
  class EffectChain;
-class FlatInput;
  class ResourcePool;
+class YCbCrInput;
  }  // namespace movit
  
-namespace movit {
-class YCbCrInput;
-}
-class QSurfaceFormat;
+// A class to estimate the future jitter. Used in QueueLengthPolicy (see below).
+//
+// There are many ways to estimate jitter; I've tested a few ones (and also
+// some algorithms that don't explicitly model jitter) with different
+// parameters on some real-life data in experiments/queue_drop_policy.cpp.
+// This is one based on simple order statistics where I've added some margin in
+// the number of starvation events; I believe that about one every hour would
+// probably be acceptable, but this one typically goes lower than that, at the
+// cost of 2–3 ms extra latency. (If the queue is hard-limited to one frame, it's
+// possible to get ~10 ms further down, but this would mean framedrops every
+// second or so.) The general strategy is: Take the 99.9-percentile jitter over
+// last 5000 frames, multiply by two, and that's our worst-case jitter
+// estimate. The fact that we're not using the max value means that we could
+// actually even throw away very late frames immediately, which means we only
+// get one user-visible event instead of seeing something both when the frame
+// arrives late (duplicate frame) and then again when we drop.
+class JitterHistory {
+private:
+       static constexpr size_t history_length = 5000;
+       static constexpr double percentile = 0.999;
+       static constexpr double multiplier = 2.0;
+
+public:
+       void register_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
+       void unregister_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
+
+       void clear() {
+               history.clear();
+               orders.clear();
+       }
+       void frame_arrived(std::chrono::steady_clock::time_point now, int64_t frame_duration, size_t dropped_frames);
+       std::chrono::steady_clock::time_point get_expected_next_frame() const { return expected_timestamp; }
+       double estimate_max_jitter() const;
+
+private:
+       // A simple O(k) based algorithm for getting the k-th largest or
+       // smallest element from our window; we simply keep the multiset
+       // ordered (insertions and deletions are O(n) as always) and then
+       // iterate from one of the sides. If we had larger values of k,
+       // we could go for a more complicated setup with two sets or heaps
+       // (one increasing and one decreasing) that we keep balanced around
+       // the point, or it is possible to reimplement std::set with
+       // counts in each node. However, since k=5, we don't need this.
+       std::multiset<double> orders;
+       std::deque<std::multiset<double>::iterator> history;
+
+       std::chrono::steady_clock::time_point expected_timestamp = std::chrono::steady_clock::time_point::min();
+
+       // Metrics. There are no direct summaries for jitter, since we already have latency summaries.
+       std::atomic<int64_t> metric_input_underestimated_jitter_frames{0};
+       std::atomic<double> metric_input_estimated_max_jitter_seconds{0.0 / 0.0};
+};
  
  // For any card that's not the master (where we pick out the frames as they
  // come, as fast as we can process), there's going to be a queue. The question
@@ -69,38 +118,39 @@ class QSurfaceFormat;
  //   2. We don't want to add more delay than is needed.
  //
  // Our general strategy is to drop as many frames as we can (helping for #2)
-// that we think is safe for #1 given jitter. To this end, we set a lower floor N,
-// where we assume that if we have N frames in the queue, we're always safe from
-// starvation. (Typically, N will be 0 or 1. It starts off at 0.) If we have
-// more than N frames in the queue after reading out the one we need, we head-drop
-// them to reduce the queue.
-//
-// N is reduced as follows: If the queue has had at least one spare frame for
-// at least 50 (master) frames (ie., it's been too conservative for a second),
-// we reduce N by 1 and reset the timers. TODO: Only do this if N ever actually
-// touched the limit.
+// that we think is safe for #1 given jitter. To this end, we measure the
+// deviation from the expected arrival time for all cards, and use that for
+// continuous jitter estimation.
  //
-// Whenever the queue is starved (we needed a frame but there was none),
-// and we've been at N since the last starvation, N was obviously too low,
-// so we increment it. We will never set N above 5, though.
+// We then drop everything from the queue that we're sure we won't need to
+// serve the output in the time before the next frame arrives. Typically,
+// this means the queue will contain 0 or 1 frames, although more is also
+// possible if the jitter is very high.
  class QueueLengthPolicy {
  public:
         QueueLengthPolicy() {}
         void reset(unsigned card_index) {
                 this->card_index = card_index;
-               safe_queue_length = 0;
-               frames_with_at_least_one = 0;
-               been_at_safe_point_since_last_starvation = false;
         }
  
-       void update_policy(int queue_length);  // Give in -1 for starvation.
+       void register_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
+       void unregister_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
+
+       // Call after picking out a frame, so 0 means starvation.
+       void update_policy(std::chrono::steady_clock::time_point now,
+                          std::chrono::steady_clock::time_point expected_next_frame,
+                          int64_t input_frame_duration,
+                          int64_t master_frame_duration,
+                          double max_input_card_jitter_seconds,
+                          double max_master_card_jitter_seconds);
         unsigned get_safe_queue_length() const { return safe_queue_length; }
  
  private:
-       unsigned card_index;  // For debugging only.
-       unsigned safe_queue_length = 0;  // Called N in the comments.
-       unsigned frames_with_at_least_one = 0;
-       bool been_at_safe_point_since_last_starvation = false;
+       unsigned card_index;  // For debugging and metrics only.
+       unsigned safe_queue_length = 0;  // Can never go below zero.
+
+       // Metrics.
+       std::atomic<int64_t> metric_input_queue_safe_length_frames{1};
  };
  
  class Mixer {
@@ -147,10 +197,17 @@ public:
                 return output_channel[output].get_display_frame(frame);
         }
  
+       // NOTE: Callbacks will be called with a mutex held, so you should probably
+       // not do real work in them.
         typedef std::function<void()> new_frame_ready_callback_t;
-       void set_frame_ready_callback(Output output, new_frame_ready_callback_t callback)
+       void add_frame_ready_callback(Output output, void *key, new_frame_ready_callback_t callback)
         {
-               output_channel[output].set_frame_ready_callback(callback);
+               output_channel[output].add_frame_ready_callback(key, callback);
+       }
+
+       void remove_frame_ready_callback(Output output, void *key)
+       {
+               output_channel[output].remove_frame_ready_callback(key);
         }
  
         // TODO: Should this really be per-channel? Shouldn't it just be called for e.g. the live output?
@@ -172,15 +229,6 @@ public:
                 output_channel[output].set_color_updated_callback(callback);
         }
  
-       typedef std::function<void(float level_lufs, float peak_db,
-                                  float global_level_lufs, float range_low_lufs, float range_high_lufs,
-                                  float gain_staging_db, float final_makeup_gain_db,
-                                  float correlation)> audio_level_callback_t;
-       void set_audio_level_callback(audio_level_callback_t callback)
-       {
-               audio_level_callback = callback;
-       }
-
         std::vector<std::string> get_transition_names()
         {
                 return theme->get_transition_names(pts());
@@ -211,16 +259,6 @@ public:
                 return theme->map_signal(channel);
         }
  
-       unsigned get_audio_source() const
-       {
-               return audio_source_channel;
-       }
-
-       void set_audio_source(unsigned channel)
-       {
-               audio_source_channel = channel;
-       }
-
         unsigned get_master_clock() const
         {
                 return master_clock_channel;
@@ -236,6 +274,9 @@ public:
                 return theme->set_signal_mapping(signal, card);
         }
  
+       YCbCrInterpretation get_input_ycbcr_interpretation(unsigned card_index) const;
+       void set_input_ycbcr_interpretation(unsigned card_index, const YCbCrInterpretation &interpretation);
+
         bool get_supports_set_wb(unsigned channel) const
         {
                 return theme->get_supports_set_wb(channel);
@@ -246,6 +287,7 @@ public:
                 theme->set_wb(channel, r, g, b);
         }
  
+       // Note: You can also get this through the global variable global_audio_mixer.
         AudioMixer *get_audio_mixer() { return &audio_mixer; }
         const AudioMixer *get_audio_mixer() const { return &audio_mixer; }
  
@@ -254,8 +296,6 @@ public:
                 should_cut = true;
         }
  
-       void reset_meters();
-
         unsigned get_num_cards() const { return num_cards; }
  
         std::string get_card_description(unsigned card_index) const {
@@ -263,6 +303,25 @@ public:
                 return cards[card_index].capture->get_description();
         }
  
+       // The difference between this and the previous function is that if a card
+       // is used as the current output, get_card_description() will return the
+       // fake card that's replacing it for input, whereas this function will return
+       // the card's actual name.
+       std::string get_output_card_description(unsigned card_index) const {
+               assert(card_can_be_used_as_output(card_index));
+               assert(card_index < num_cards);
+               if (cards[card_index].parked_capture) {
+                       return cards[card_index].parked_capture->get_description();
+               } else {
+                       return cards[card_index].capture->get_description();
+               }
+       }
+
+       bool card_can_be_used_as_output(unsigned card_index) const {
+               assert(card_index < num_cards);
+               return cards[card_index].output != nullptr;
+       }
+
         std::map<uint32_t, bmusb::VideoMode> get_available_video_modes(unsigned card_index) const {
                 assert(card_index < num_cards);
                 return cards[card_index].capture->get_available_video_modes();
@@ -314,8 +373,46 @@ public:
                 video_encoder->change_x264_bitrate(rate_kbit);
         }
  
+       int get_output_card_index() const {  // -1 = no output, just stream.
+               return desired_output_card_index;
+       }
+
+       void set_output_card(int card_index) { // -1 = no output, just stream.
+               desired_output_card_index = card_index;
+       }
+
+       std::map<uint32_t, bmusb::VideoMode> get_available_output_video_modes() const;
+
+       uint32_t get_output_video_mode() const {
+               return desired_output_video_mode;
+       }
+
+       void set_output_video_mode(uint32_t mode) {
+               desired_output_video_mode = mode;
+       }
+
+       void set_display_timecode_in_stream(bool enable) {
+               display_timecode_in_stream = enable;
+       }
+
+       void set_display_timecode_on_stdout(bool enable) {
+               display_timecode_on_stdout = enable;
+       }
+
+       int64_t get_num_connected_clients() const {
+               return httpd.get_num_connected_clients();
+       }
+
  private:
-       void configure_card(unsigned card_index, bmusb::CaptureInterface *capture, bool is_fake_capture);
+       struct CaptureCard;
+
+       enum class CardType {
+               LIVE_CARD,
+               FAKE_CAPTURE,
+               FFMPEG_INPUT
+       };
+       void configure_card(unsigned card_index, bmusb::CaptureInterface *capture, CardType card_type, DeckLinkOutput *output);
+       void set_output_card_internal(int card_index);  // Should only be called from the mixer thread.
         void bm_frame(unsigned card_index, uint16_t timecode,
                 bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
                 bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format);
@@ -324,39 +421,69 @@ private:
         void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
         void thread_func();
         void handle_hotplugged_cards();
-       void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame);
+       void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame, bool is_preroll, std::chrono::steady_clock::time_point frame_timestamp);
+       std::string get_timecode_text() const;
         void render_one_frame(int64_t duration);
-       void send_audio_level_callback();
         void audio_thread_func();
-       void process_audio_one_frame(int64_t frame_pts_int, int num_samples, ResamplingQueue::RateAdjustmentPolicy rate_adjustment_policy);
-       void subsample_chroma(GLuint src_tex, GLuint dst_dst);
         void release_display_frame(DisplayFrame *frame);
         double pts() { return double(pts_int) / TIMEBASE; }
+       void trim_queue(CaptureCard *card, size_t safe_queue_length);
+       std::pair<std::string, std::string> get_channels_json();
+       std::pair<std::string, std::string> get_channel_color_http(unsigned channel_idx);
  
         HTTPD httpd;
-       unsigned num_cards;
+       unsigned num_cards, num_video_inputs;
  
-       QSurface *mixer_surface, *h264_encoder_surface;
+       QSurface *mixer_surface, *h264_encoder_surface, *decklink_output_surface;
         std::unique_ptr<movit::ResourcePool> resource_pool;
         std::unique_ptr<Theme> theme;
         std::atomic<unsigned> audio_source_channel{0};
-       std::atomic<unsigned> master_clock_channel{0};
+       std::atomic<int> master_clock_channel{0};  // Gets overridden by <output_card_index> if set.
+       int output_card_index = -1;  // -1 for none.
+       uint32_t output_video_mode = -1;
+
+       // The mechanics of changing the output card and modes are so intricately connected
+       // with the work the mixer thread is doing. Thus, we don't change it directly,
+       // we just set this variable instead, which signals to the mixer thread that
+       // it should do the change before the next frame. This simplifies locking
+       // considerations immensely.
+       std::atomic<int> desired_output_card_index{-1};
+       std::atomic<uint32_t> desired_output_video_mode{0};
+
         std::unique_ptr<movit::EffectChain> display_chain;
-       GLuint cbcr_program_num;  // Owned by <resource_pool>.
-       GLuint cbcr_vbo;  // Holds position and texcoord data.
-       GLuint cbcr_position_attribute_index, cbcr_texcoord_attribute_index;
+       std::unique_ptr<ChromaSubsampler> chroma_subsampler;
+       std::unique_ptr<v210Converter> v210_converter;
         std::unique_ptr<VideoEncoder> video_encoder;
  
+       std::unique_ptr<TimecodeRenderer> timecode_renderer;
+       std::atomic<bool> display_timecode_in_stream{false};
+       std::atomic<bool> display_timecode_on_stdout{false};
+
         // Effects part of <display_chain>. Owned by <display_chain>.
-       movit::FlatInput *display_input;
+       movit::YCbCrInput *display_input;
  
         int64_t pts_int = 0;  // In TIMEBASE units.
+       unsigned frame_num = 0;
+
+       // Accumulated errors in number of 1/TIMEBASE audio samples. If OUTPUT_FREQUENCY divided by
+       // frame rate is integer, will always stay zero.
+       unsigned fractional_samples = 0;
  
-       std::mutex bmusb_mutex;
+       mutable std::mutex card_mutex;
         bool has_bmusb_thread = false;
         struct CaptureCard {
-               bmusb::CaptureInterface *capture = nullptr;
+               std::unique_ptr<bmusb::CaptureInterface> capture;
                 bool is_fake_capture;
+               CardType type;
+               std::unique_ptr<DeckLinkOutput> output;
+
+               // If this card is used for output (ie., output_card_index points to it),
+               // it cannot simultaneously be uesd for capture, so <capture> gets replaced
+               // by a FakeCapture. However, since reconstructing the real capture object
+               // with all its state can be annoying, it is not being deleted, just stopped
+               // and moved here.
+               std::unique_ptr<bmusb::CaptureInterface> parked_capture;
+
                 std::unique_ptr<PBOFrameAllocator> frame_allocator;
  
                 // Stuff for the OpenGL context (for texture uploading).
@@ -369,22 +496,49 @@ private:
                         unsigned field;  // Which field (0 or 1) of the frame to use. Always 0 for progressive.
                         std::function<void()> upload_func;  // Needs to be called to actually upload the texture to OpenGL.
                         unsigned dropped_frames = 0;  // Number of dropped frames before this one.
+                       std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
                 };
-               std::queue<NewFrame> new_frames;
+               std::deque<NewFrame> new_frames;
                 bool should_quit = false;
                 std::condition_variable new_frames_changed;  // Set whenever new_frames (or should_quit) is changed.
  
                 QueueLengthPolicy queue_length_policy;  // Refers to the "new_frames" queue.
  
-               // Accumulated errors in number of 1/TIMEBASE samples. If OUTPUT_FREQUENCY divided by
-               // frame rate is integer, will always stay zero.
-               unsigned fractional_samples = 0;
-
                 int last_timecode = -1;  // Unwrapped.
+
+               JitterHistory jitter_history;
+
+               // Metrics.
+               std::vector<std::pair<std::string, std::string>> labels;
+               std::atomic<int64_t> metric_input_received_frames{0};
+               std::atomic<int64_t> metric_input_duped_frames{0};
+               std::atomic<int64_t> metric_input_dropped_frames_jitter{0};
+               std::atomic<int64_t> metric_input_dropped_frames_error{0};
+               std::atomic<int64_t> metric_input_resets{0};
+               std::atomic<int64_t> metric_input_queue_length_frames{0};
+
+               std::atomic<int64_t> metric_input_has_signal_bool{-1};
+               std::atomic<int64_t> metric_input_is_connected_bool{-1};
+               std::atomic<int64_t> metric_input_interlaced_bool{-1};
+               std::atomic<int64_t> metric_input_width_pixels{-1};
+               std::atomic<int64_t> metric_input_height_pixels{-1};
+               std::atomic<int64_t> metric_input_frame_rate_nom{-1};
+               std::atomic<int64_t> metric_input_frame_rate_den{-1};
+               std::atomic<int64_t> metric_input_sample_rate_hz{-1};
         };
-       CaptureCard cards[MAX_VIDEO_CARDS];  // protected by <bmusb_mutex>
-       AudioMixer audio_mixer;
-       void get_one_frame_from_each_card(unsigned master_card_index, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS], int num_samples[MAX_VIDEO_CARDS]);
+       JitterHistory output_jitter_history;
+       CaptureCard cards[MAX_VIDEO_CARDS];  // Protected by <card_mutex>.
+       YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS];  // Protected by <card_mutex>.
+       AudioMixer audio_mixer;  // Same as global_audio_mixer (see audio_mixer.h).
+       bool input_card_is_master_clock(unsigned card_index, unsigned master_card_index) const;
+       struct OutputFrameInfo {
+               int dropped_frames;  // Since last frame.
+               int num_samples;  // Audio samples needed for this output frame.
+               int64_t frame_duration;  // In TIMEBASE units.
+               bool is_preroll;
+               std::chrono::steady_clock::time_point frame_timestamp;
+       };
+       OutputFrameInfo get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS]);
  
         InputState input_state;
  
@@ -396,9 +550,10 @@ private:
         class OutputChannel {
         public:
                 ~OutputChannel();
-               void output_frame(DisplayFrame frame);
+               void output_frame(DisplayFrame &&frame);
                 bool get_display_frame(DisplayFrame *frame);
-               void set_frame_ready_callback(new_frame_ready_callback_t callback);
+               void add_frame_ready_callback(void *key, new_frame_ready_callback_t callback);
+               void remove_frame_ready_callback(void *key);
                 void set_transition_names_updated_callback(transition_names_updated_callback_t callback);
                 void set_name_updated_callback(name_updated_callback_t callback);
                 void set_color_updated_callback(color_updated_callback_t callback);
@@ -411,7 +566,7 @@ private:
                 std::mutex frame_mutex;
                 DisplayFrame current_frame, ready_frame;  // protected by <frame_mutex>
                 bool has_current_frame = false, has_ready_frame = false;  // protected by <frame_mutex>
-               new_frame_ready_callback_t new_frame_ready_callback;
+               std::map<void *, new_frame_ready_callback_t> new_frame_ready_callbacks;  // protected by <frame_mutex>
                 transition_names_updated_callback_t transition_names_updated_callback;
                 name_updated_callback_t name_updated_callback;
                 color_updated_callback_t color_updated_callback;
@@ -426,19 +581,13 @@ private:
         std::atomic<bool> should_quit{false};
         std::atomic<bool> should_cut{false};
  
-       audio_level_callback_t audio_level_callback = nullptr;
-       mutable std::mutex audio_measure_mutex;
-       Ebu_r128_proc r128;  // Under audio_measure_mutex.
-       CorrelationMeasurer correlation;  // Under audio_measure_mutex.
-       Resampler peak_resampler;  // Under audio_measure_mutex.
-       std::atomic<float> peak{0.0f};
-
         std::unique_ptr<ALSAOutput> alsa;
  
         struct AudioTask {
                 int64_t pts_int;
                 int num_samples;
                 bool adjust_rate;
+               std::chrono::steady_clock::time_point frame_timestamp;
         };
         std::mutex audio_mutex;
         std::condition_variable audio_task_queue_changed;
@@ -452,6 +601,5 @@ private:
  };
  
  extern Mixer *global_mixer;
-extern bool uses_mlock;
  
  #endif  // !defined(_MIXER_H)