]> git.sesse.net Git - nageru/blob - mixer.h
Move audio encoding over to its own mutex, again reducing mutex contention.
[nageru] / mixer.h
1 #ifndef _MIXER_H
2 #define _MIXER_H 1
3
4 // The actual video mixer, running in its own separate background thread.
5
6 #include <assert.h>
7 #include <epoxy/gl.h>
8
9 #undef Success
10
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <atomic>
14 #include <chrono>
15 #include <condition_variable>
16 #include <cstddef>
17 #include <functional>
18 #include <map>
19 #include <memory>
20 #include <mutex>
21 #include <queue>
22 #include <string>
23 #include <thread>
24 #include <vector>
25
26 #include <movit/image_format.h>
27
28 #include "audio_mixer.h"
29 #include "bmusb/bmusb.h"
30 #include "defs.h"
31 #include "httpd.h"
32 #include "input_state.h"
33 #include "libusb.h"
34 #include "pbo_frame_allocator.h"
35 #include "ref_counted_frame.h"
36 #include "ref_counted_gl_sync.h"
37 #include "theme.h"
38 #include "timebase.h"
39 #include "video_encoder.h"
40 #include "ycbcr_interpretation.h"
41
42 class ALSAOutput;
43 class ChromaSubsampler;
44 class DeckLinkOutput;
45 class QSurface;
46 class QSurfaceFormat;
47 class TimecodeRenderer;
48 class v210Converter;
49
50 namespace movit {
51 class Effect;
52 class EffectChain;
53 class ResourcePool;
54 class YCbCrInput;
55 }  // namespace movit
56
57 // A class to estimate the future jitter. Used in QueueLengthPolicy (see below).
58 //
59 // There are many ways to estimate jitter; I've tested a few ones (and also
60 // some algorithms that don't explicitly model jitter) with different
61 // parameters on some real-life data in experiments/queue_drop_policy.cpp.
62 // This is one based on simple order statistics where I've added some margin in
63 // the number of starvation events; I believe that about one every hour would
64 // probably be acceptable, but this one typically goes lower than that, at the
65 // cost of 2–3 ms extra latency. (If the queue is hard-limited to one frame, it's
66 // possible to get ~10 ms further down, but this would mean framedrops every
67 // second or so.) The general strategy is: Take the 99.9-percentile jitter over
68 // last 5000 frames, multiply by two, and that's our worst-case jitter
69 // estimate. The fact that we're not using the max value means that we could
70 // actually even throw away very late frames immediately, which means we only
71 // get one user-visible event instead of seeing something both when the frame
72 // arrives late (duplicate frame) and then again when we drop.
73 class JitterHistory {
74 private:
75         static constexpr size_t history_length = 5000;
76         static constexpr double percentile = 0.999;
77         static constexpr double multiplier = 2.0;
78
79 public:
80         void register_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
81         void unregister_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
82
83         void clear() {
84                 history.clear();
85                 orders.clear();
86         }
87         void frame_arrived(std::chrono::steady_clock::time_point now, int64_t frame_duration, size_t dropped_frames);
88         std::chrono::steady_clock::time_point get_expected_next_frame() const { return expected_timestamp; }
89         double estimate_max_jitter() const;
90
91 private:
92         // A simple O(k) based algorithm for getting the k-th largest or
93         // smallest element from our window; we simply keep the multiset
94         // ordered (insertions and deletions are O(n) as always) and then
95         // iterate from one of the sides. If we had larger values of k,
96         // we could go for a more complicated setup with two sets or heaps
97         // (one increasing and one decreasing) that we keep balanced around
98         // the point, or it is possible to reimplement std::set with
99         // counts in each node. However, since k=5, we don't need this.
100         std::multiset<double> orders;
101         std::deque<std::multiset<double>::iterator> history;
102
103         std::chrono::steady_clock::time_point expected_timestamp = std::chrono::steady_clock::time_point::min();
104
105         // Metrics. There are no direct summaries for jitter, since we already have latency summaries.
106         std::atomic<int64_t> metric_input_underestimated_jitter_frames{0};
107         std::atomic<double> metric_input_estimated_max_jitter_seconds{0.0 / 0.0};
108 };
109
110 // For any card that's not the master (where we pick out the frames as they
111 // come, as fast as we can process), there's going to be a queue. The question
112 // is when we should drop frames from that queue (apart from the obvious
113 // dropping if the 16-frame queue should become full), especially given that
114 // the frame rate could be lower or higher than the master (either subtly or
115 // dramatically). We have two (conflicting) demands:
116 //
117 //   1. We want to avoid starving the queue.
118 //   2. We don't want to add more delay than is needed.
119 //
120 // Our general strategy is to drop as many frames as we can (helping for #2)
121 // that we think is safe for #1 given jitter. To this end, we measure the
122 // deviation from the expected arrival time for all cards, and use that for
123 // continuous jitter estimation.
124 //
125 // We then drop everything from the queue that we're sure we won't need to
126 // serve the output in the time before the next frame arrives. Typically,
127 // this means the queue will contain 0 or 1 frames, although more is also
128 // possible if the jitter is very high.
129 class QueueLengthPolicy {
130 public:
131         QueueLengthPolicy() {}
132         void reset(unsigned card_index) {
133                 this->card_index = card_index;
134         }
135
136         void register_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
137         void unregister_metrics(const std::vector<std::pair<std::string, std::string>> &labels);
138
139         // Call after picking out a frame, so 0 means starvation.
140         void update_policy(std::chrono::steady_clock::time_point now,
141                            std::chrono::steady_clock::time_point expected_next_frame,
142                            int64_t master_frame_duration,
143                            double max_input_card_jitter_seconds,
144                            double max_master_card_jitter_seconds);
145         unsigned get_safe_queue_length() const { return safe_queue_length; }
146
147 private:
148         unsigned card_index;  // For debugging and metrics only.
149         unsigned safe_queue_length = 0;  // Can never go below zero.
150
151         // Metrics.
152         std::atomic<int64_t> metric_input_queue_safe_length_frames{1};
153 };
154
155 class Mixer {
156 public:
157         // The surface format is used for offscreen destinations for OpenGL contexts we need.
158         Mixer(const QSurfaceFormat &format, unsigned num_cards);
159         ~Mixer();
160         void start();
161         void quit();
162
163         void transition_clicked(int transition_num);
164         void channel_clicked(int preview_num);
165
166         enum Output {
167                 OUTPUT_LIVE = 0,
168                 OUTPUT_PREVIEW,
169                 OUTPUT_INPUT0,  // 1, 2, 3, up to 15 follow numerically.
170                 NUM_OUTPUTS = 18
171         };
172
173         struct DisplayFrame {
174                 // The chain for rendering this frame. To render a display frame,
175                 // first wait for <ready_fence>, then call <setup_chain>
176                 // to wire up all the inputs, and then finally call
177                 // chain->render_to_screen() or similar.
178                 movit::EffectChain *chain;
179                 std::function<void()> setup_chain;
180
181                 // Asserted when all the inputs are ready; you cannot render the chain
182                 // before this.
183                 RefCountedGLsync ready_fence;
184
185                 // Holds on to all the input frames needed for this display frame,
186                 // so they are not released while still rendering.
187                 std::vector<RefCountedFrame> input_frames;
188
189                 // Textures that should be released back to the resource pool
190                 // when this frame disappears, if any.
191                 // TODO: Refcount these as well?
192                 std::vector<GLuint> temp_textures;
193         };
194         // Implicitly frees the previous one if there's a new frame available.
195         bool get_display_frame(Output output, DisplayFrame *frame) {
196                 return output_channel[output].get_display_frame(frame);
197         }
198
199         // NOTE: Callbacks will be called with a mutex held, so you should probably
200         // not do real work in them.
201         typedef std::function<void()> new_frame_ready_callback_t;
202         void add_frame_ready_callback(Output output, void *key, new_frame_ready_callback_t callback)
203         {
204                 output_channel[output].add_frame_ready_callback(key, callback);
205         }
206
207         void remove_frame_ready_callback(Output output, void *key)
208         {
209                 output_channel[output].remove_frame_ready_callback(key);
210         }
211
212         // TODO: Should this really be per-channel? Shouldn't it just be called for e.g. the live output?
213         typedef std::function<void(const std::vector<std::string> &)> transition_names_updated_callback_t;
214         void set_transition_names_updated_callback(Output output, transition_names_updated_callback_t callback)
215         {
216                 output_channel[output].set_transition_names_updated_callback(callback);
217         }
218
219         typedef std::function<void(const std::string &)> name_updated_callback_t;
220         void set_name_updated_callback(Output output, name_updated_callback_t callback)
221         {
222                 output_channel[output].set_name_updated_callback(callback);
223         }
224
225         typedef std::function<void(const std::string &)> color_updated_callback_t;
226         void set_color_updated_callback(Output output, color_updated_callback_t callback)
227         {
228                 output_channel[output].set_color_updated_callback(callback);
229         }
230
231         std::vector<std::string> get_transition_names()
232         {
233                 return theme->get_transition_names(pts());
234         }
235
236         unsigned get_num_channels() const
237         {
238                 return theme->get_num_channels();
239         }
240
241         std::string get_channel_name(unsigned channel) const
242         {
243                 return theme->get_channel_name(channel);
244         }
245
246         std::string get_channel_color(unsigned channel) const
247         {
248                 return theme->get_channel_color(channel);
249         }
250
251         int get_channel_signal(unsigned channel) const
252         {
253                 return theme->get_channel_signal(channel);
254         }
255
256         int map_signal(unsigned channel)
257         {
258                 return theme->map_signal(channel);
259         }
260
261         unsigned get_master_clock() const
262         {
263                 return master_clock_channel;
264         }
265
266         void set_master_clock(unsigned channel)
267         {
268                 master_clock_channel = channel;
269         }
270
271         void set_signal_mapping(int signal, int card)
272         {
273                 return theme->set_signal_mapping(signal, card);
274         }
275
276         YCbCrInterpretation get_input_ycbcr_interpretation(unsigned card_index) const;
277         void set_input_ycbcr_interpretation(unsigned card_index, const YCbCrInterpretation &interpretation);
278
279         bool get_supports_set_wb(unsigned channel) const
280         {
281                 return theme->get_supports_set_wb(channel);
282         }
283
284         void set_wb(unsigned channel, double r, double g, double b) const
285         {
286                 theme->set_wb(channel, r, g, b);
287         }
288
289         // Note: You can also get this through the global variable global_audio_mixer.
290         AudioMixer *get_audio_mixer() { return &audio_mixer; }
291         const AudioMixer *get_audio_mixer() const { return &audio_mixer; }
292
293         void schedule_cut()
294         {
295                 should_cut = true;
296         }
297
298         unsigned get_num_cards() const { return num_cards; }
299
300         std::string get_card_description(unsigned card_index) const {
301                 assert(card_index < num_cards);
302                 return cards[card_index].capture->get_description();
303         }
304
305         // The difference between this and the previous function is that if a card
306         // is used as the current output, get_card_description() will return the
307         // fake card that's replacing it for input, whereas this function will return
308         // the card's actual name.
309         std::string get_output_card_description(unsigned card_index) const {
310                 assert(card_can_be_used_as_output(card_index));
311                 assert(card_index < num_cards);
312                 if (cards[card_index].parked_capture) {
313                         return cards[card_index].parked_capture->get_description();
314                 } else {
315                         return cards[card_index].capture->get_description();
316                 }
317         }
318
319         bool card_can_be_used_as_output(unsigned card_index) const {
320                 assert(card_index < num_cards);
321                 return cards[card_index].output != nullptr;
322         }
323
324         std::map<uint32_t, bmusb::VideoMode> get_available_video_modes(unsigned card_index) const {
325                 assert(card_index < num_cards);
326                 return cards[card_index].capture->get_available_video_modes();
327         }
328
329         uint32_t get_current_video_mode(unsigned card_index) const {
330                 assert(card_index < num_cards);
331                 return cards[card_index].capture->get_current_video_mode();
332         }
333
334         void set_video_mode(unsigned card_index, uint32_t mode) {
335                 assert(card_index < num_cards);
336                 cards[card_index].capture->set_video_mode(mode);
337         }
338
339         void start_mode_scanning(unsigned card_index);
340
341         std::map<uint32_t, std::string> get_available_video_inputs(unsigned card_index) const {
342                 assert(card_index < num_cards);
343                 return cards[card_index].capture->get_available_video_inputs();
344         }
345
346         uint32_t get_current_video_input(unsigned card_index) const {
347                 assert(card_index < num_cards);
348                 return cards[card_index].capture->get_current_video_input();
349         }
350
351         void set_video_input(unsigned card_index, uint32_t input) {
352                 assert(card_index < num_cards);
353                 cards[card_index].capture->set_video_input(input);
354         }
355
356         std::map<uint32_t, std::string> get_available_audio_inputs(unsigned card_index) const {
357                 assert(card_index < num_cards);
358                 return cards[card_index].capture->get_available_audio_inputs();
359         }
360
361         uint32_t get_current_audio_input(unsigned card_index) const {
362                 assert(card_index < num_cards);
363                 return cards[card_index].capture->get_current_audio_input();
364         }
365
366         void set_audio_input(unsigned card_index, uint32_t input) {
367                 assert(card_index < num_cards);
368                 cards[card_index].capture->set_audio_input(input);
369         }
370
371         void change_x264_bitrate(unsigned rate_kbit) {
372                 video_encoder->change_x264_bitrate(rate_kbit);
373         }
374
375         int get_output_card_index() const {  // -1 = no output, just stream.
376                 return desired_output_card_index;
377         }
378
379         void set_output_card(int card_index) { // -1 = no output, just stream.
380                 desired_output_card_index = card_index;
381         }
382
383         std::map<uint32_t, bmusb::VideoMode> get_available_output_video_modes() const;
384
385         uint32_t get_output_video_mode() const {
386                 return desired_output_video_mode;
387         }
388
389         void set_output_video_mode(uint32_t mode) {
390                 desired_output_video_mode = mode;
391         }
392
393         void set_display_timecode_in_stream(bool enable) {
394                 display_timecode_in_stream = enable;
395         }
396
397         void set_display_timecode_on_stdout(bool enable) {
398                 display_timecode_on_stdout = enable;
399         }
400
401 private:
402         struct CaptureCard;
403
404         enum class CardType {
405                 LIVE_CARD,
406                 FAKE_CAPTURE,
407                 FFMPEG_INPUT
408         };
409         void configure_card(unsigned card_index, bmusb::CaptureInterface *capture, CardType card_type, DeckLinkOutput *output);
410         void set_output_card_internal(int card_index);  // Should only be called from the mixer thread.
411         void bm_frame(unsigned card_index, uint16_t timecode,
412                 bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
413                 bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format);
414         void bm_hotplug_add(libusb_device *dev);
415         void bm_hotplug_remove(unsigned card_index);
416         void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
417         void thread_func();
418         void handle_hotplugged_cards();
419         void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame, bool is_preroll, std::chrono::steady_clock::time_point frame_timestamp);
420         std::string get_timecode_text() const;
421         void render_one_frame(int64_t duration);
422         void audio_thread_func();
423         void release_display_frame(DisplayFrame *frame);
424         double pts() { return double(pts_int) / TIMEBASE; }
425         void trim_queue(CaptureCard *card, size_t safe_queue_length);
426
427         HTTPD httpd;
428         unsigned num_cards, num_video_inputs;
429
430         QSurface *mixer_surface, *h264_encoder_surface, *decklink_output_surface;
431         std::unique_ptr<movit::ResourcePool> resource_pool;
432         std::unique_ptr<Theme> theme;
433         std::atomic<unsigned> audio_source_channel{0};
434         std::atomic<int> master_clock_channel{0};  // Gets overridden by <output_card_index> if set.
435         int output_card_index = -1;  // -1 for none.
436         uint32_t output_video_mode = -1;
437
438         // The mechanics of changing the output card and modes are so intricately connected
439         // with the work the mixer thread is doing. Thus, we don't change it directly,
440         // we just set this variable instead, which signals to the mixer thread that
441         // it should do the change before the next frame. This simplifies locking
442         // considerations immensely.
443         std::atomic<int> desired_output_card_index{-1};
444         std::atomic<uint32_t> desired_output_video_mode{0};
445
446         std::unique_ptr<movit::EffectChain> display_chain;
447         std::unique_ptr<ChromaSubsampler> chroma_subsampler;
448         std::unique_ptr<v210Converter> v210_converter;
449         std::unique_ptr<VideoEncoder> video_encoder;
450
451         std::unique_ptr<TimecodeRenderer> timecode_renderer;
452         std::atomic<bool> display_timecode_in_stream{false};
453         std::atomic<bool> display_timecode_on_stdout{false};
454
455         // Effects part of <display_chain>. Owned by <display_chain>.
456         movit::YCbCrInput *display_input;
457
458         int64_t pts_int = 0;  // In TIMEBASE units.
459         unsigned frame_num = 0;
460
461         // Accumulated errors in number of 1/TIMEBASE audio samples. If OUTPUT_FREQUENCY divided by
462         // frame rate is integer, will always stay zero.
463         unsigned fractional_samples = 0;
464
465         mutable std::mutex card_mutex;
466         bool has_bmusb_thread = false;
467         struct CaptureCard {
468                 std::unique_ptr<bmusb::CaptureInterface> capture;
469                 bool is_fake_capture;
470                 CardType type;
471                 std::unique_ptr<DeckLinkOutput> output;
472
473                 // If this card is used for output (ie., output_card_index points to it),
474                 // it cannot simultaneously be uesd for capture, so <capture> gets replaced
475                 // by a FakeCapture. However, since reconstructing the real capture object
476                 // with all its state can be annoying, it is not being deleted, just stopped
477                 // and moved here.
478                 std::unique_ptr<bmusb::CaptureInterface> parked_capture;
479
480                 std::unique_ptr<PBOFrameAllocator> frame_allocator;
481
482                 // Stuff for the OpenGL context (for texture uploading).
483                 QSurface *surface = nullptr;
484
485                 struct NewFrame {
486                         RefCountedFrame frame;
487                         int64_t length;  // In TIMEBASE units.
488                         bool interlaced;
489                         unsigned field;  // Which field (0 or 1) of the frame to use. Always 0 for progressive.
490                         std::function<void()> upload_func;  // Needs to be called to actually upload the texture to OpenGL.
491                         unsigned dropped_frames = 0;  // Number of dropped frames before this one.
492                         std::chrono::steady_clock::time_point received_timestamp = std::chrono::steady_clock::time_point::min();
493                 };
494                 std::deque<NewFrame> new_frames;
495                 bool should_quit = false;
496                 std::condition_variable new_frames_changed;  // Set whenever new_frames (or should_quit) is changed.
497
498                 QueueLengthPolicy queue_length_policy;  // Refers to the "new_frames" queue.
499
500                 int last_timecode = -1;  // Unwrapped.
501
502                 JitterHistory jitter_history;
503
504                 // Metrics.
505                 std::vector<std::pair<std::string, std::string>> labels;
506                 std::atomic<int64_t> metric_input_received_frames{0};
507                 std::atomic<int64_t> metric_input_duped_frames{0};
508                 std::atomic<int64_t> metric_input_dropped_frames_jitter{0};
509                 std::atomic<int64_t> metric_input_dropped_frames_error{0};
510                 std::atomic<int64_t> metric_input_resets{0};
511                 std::atomic<int64_t> metric_input_queue_length_frames{0};
512
513                 std::atomic<int64_t> metric_input_has_signal_bool{-1};
514                 std::atomic<int64_t> metric_input_is_connected_bool{-1};
515                 std::atomic<int64_t> metric_input_interlaced_bool{-1};
516                 std::atomic<int64_t> metric_input_width_pixels{-1};
517                 std::atomic<int64_t> metric_input_height_pixels{-1};
518                 std::atomic<int64_t> metric_input_frame_rate_nom{-1};
519                 std::atomic<int64_t> metric_input_frame_rate_den{-1};
520                 std::atomic<int64_t> metric_input_sample_rate_hz{-1};
521         };
522         JitterHistory output_jitter_history;
523         CaptureCard cards[MAX_VIDEO_CARDS];  // Protected by <card_mutex>.
524         YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS];  // Protected by <card_mutex>.
525         AudioMixer audio_mixer;  // Same as global_audio_mixer (see audio_mixer.h).
526         bool input_card_is_master_clock(unsigned card_index, unsigned master_card_index) const;
527         struct OutputFrameInfo {
528                 int dropped_frames;  // Since last frame.
529                 int num_samples;  // Audio samples needed for this output frame.
530                 int64_t frame_duration;  // In TIMEBASE units.
531                 bool is_preroll;
532                 std::chrono::steady_clock::time_point frame_timestamp;
533         };
534         OutputFrameInfo get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS]);
535
536         InputState input_state;
537
538         // Cards we have been noticed about being hotplugged, but haven't tried adding yet.
539         // Protected by its own mutex.
540         std::mutex hotplug_mutex;
541         std::vector<libusb_device *> hotplugged_cards;
542
543         class OutputChannel {
544         public:
545                 ~OutputChannel();
546                 void output_frame(DisplayFrame frame);
547                 bool get_display_frame(DisplayFrame *frame);
548                 void add_frame_ready_callback(void *key, new_frame_ready_callback_t callback);
549                 void remove_frame_ready_callback(void *key);
550                 void set_transition_names_updated_callback(transition_names_updated_callback_t callback);
551                 void set_name_updated_callback(name_updated_callback_t callback);
552                 void set_color_updated_callback(color_updated_callback_t callback);
553
554         private:
555                 friend class Mixer;
556
557                 unsigned channel;
558                 Mixer *parent = nullptr;  // Not owned.
559                 std::mutex frame_mutex;
560                 DisplayFrame current_frame, ready_frame;  // protected by <frame_mutex>
561                 bool has_current_frame = false, has_ready_frame = false;  // protected by <frame_mutex>
562                 std::map<void *, new_frame_ready_callback_t> new_frame_ready_callbacks;  // protected by <frame_mutex>
563                 transition_names_updated_callback_t transition_names_updated_callback;
564                 name_updated_callback_t name_updated_callback;
565                 color_updated_callback_t color_updated_callback;
566
567                 std::vector<std::string> last_transition_names;
568                 std::string last_name, last_color;
569         };
570         OutputChannel output_channel[NUM_OUTPUTS];
571
572         std::thread mixer_thread;
573         std::thread audio_thread;
574         std::atomic<bool> should_quit{false};
575         std::atomic<bool> should_cut{false};
576
577         std::unique_ptr<ALSAOutput> alsa;
578
579         struct AudioTask {
580                 int64_t pts_int;
581                 int num_samples;
582                 bool adjust_rate;
583                 std::chrono::steady_clock::time_point frame_timestamp;
584         };
585         std::mutex audio_mutex;
586         std::condition_variable audio_task_queue_changed;
587         std::queue<AudioTask> audio_task_queue;  // Under audio_mutex.
588
589         // For mode scanning.
590         bool is_mode_scanning[MAX_VIDEO_CARDS]{ false };
591         std::vector<uint32_t> mode_scanlist[MAX_VIDEO_CARDS];
592         unsigned mode_scanlist_index[MAX_VIDEO_CARDS]{ 0 };
593         std::chrono::steady_clock::time_point last_mode_scan_change[MAX_VIDEO_CARDS];
594
595         // Metrics.
596         std::atomic<int64_t> metric_frames_output_total{0};
597         std::atomic<int64_t> metric_frames_output_dropped{0};
598         std::atomic<double> metric_start_time_seconds{0.0 / 0.0};
599         std::atomic<int64_t> metrics_memory_used_bytes{0};
600         std::atomic<double> metrics_memory_locked_limit_bytes{0.0 / 0.0};
601 };
602
603 extern Mixer *global_mixer;
604 extern bool uses_mlock;
605
606 #endif  // !defined(_MIXER_H)