]> git.sesse.net Git - nageru/blob - nageru/decklink_output.cpp
WIP patch for async output.
[nageru] / nageru / decklink_output.cpp
1 #include <movit/effect_util.h>
2 #include <movit/util.h>
3 #include <movit/resource_pool.h>  // Must be above the Xlib includes.
4 #include <pthread.h>
5 #include <unistd.h>
6
7 #include <mutex>
8
9 #include <epoxy/egl.h>
10
11 #include "chroma_subsampler.h"
12 #include "decklink_output.h"
13 #include "decklink_util.h"
14 #include "flags.h"
15 #include "shared/metrics.h"
16 #include "print_latency.h"
17 #include "shared/timebase.h"
18 #include "v210_converter.h"
19
20 using namespace movit;
21 using namespace std;
22 using namespace std::chrono;
23
24 namespace {
25
26 // This class can be deleted during regular use, so make all the metrics static.
27 once_flag decklink_metrics_inited;
28 LatencyHistogram latency_histogram;
29 atomic<int64_t> metric_decklink_output_width_pixels{-1};
30 atomic<int64_t> metric_decklink_output_height_pixels{-1};
31 atomic<int64_t> metric_decklink_output_frame_rate_den{-1};
32 atomic<int64_t> metric_decklink_output_frame_rate_nom{-1};
33 atomic<int64_t> metric_decklink_output_inflight_frames{0};
34 atomic<int64_t> metric_decklink_output_color_mismatch_frames{0};
35
36 atomic<int64_t> metric_decklink_output_scheduled_frames_dropped{0};
37 atomic<int64_t> metric_decklink_output_scheduled_frames_late{0};
38 atomic<int64_t> metric_decklink_output_scheduled_frames_normal{0};
39 atomic<int64_t> metric_decklink_output_scheduled_frames_preroll{0};
40
41 atomic<int64_t> metric_decklink_output_completed_frames_completed{0};
42 atomic<int64_t> metric_decklink_output_completed_frames_dropped{0};
43 atomic<int64_t> metric_decklink_output_completed_frames_flushed{0};
44 atomic<int64_t> metric_decklink_output_completed_frames_late{0};
45 atomic<int64_t> metric_decklink_output_completed_frames_unknown{0};
46
47 atomic<int64_t> metric_decklink_output_scheduled_samples{0};
48
49 Summary metric_decklink_output_margin_seconds;
50
51 }  // namespace
52
53 DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, unsigned width, unsigned height, unsigned card_index)
54         : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index)
55 {
56         chroma_subsampler.reset(new ChromaSubsampler(resource_pool));
57
58         call_once(decklink_metrics_inited, [](){
59                 latency_histogram.init("decklink_output");
60                 global_metrics.add("decklink_output_width_pixels", &metric_decklink_output_width_pixels, Metrics::TYPE_GAUGE);
61                 global_metrics.add("decklink_output_height_pixels", &metric_decklink_output_height_pixels, Metrics::TYPE_GAUGE);
62                 global_metrics.add("decklink_output_frame_rate_den", &metric_decklink_output_frame_rate_den, Metrics::TYPE_GAUGE);
63                 global_metrics.add("decklink_output_frame_rate_nom", &metric_decklink_output_frame_rate_nom, Metrics::TYPE_GAUGE);
64                 global_metrics.add("decklink_output_inflight_frames", &metric_decklink_output_inflight_frames, Metrics::TYPE_GAUGE);
65                 global_metrics.add("decklink_output_color_mismatch_frames", &metric_decklink_output_color_mismatch_frames);
66
67                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "dropped" }}, &metric_decklink_output_scheduled_frames_dropped);
68                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "late" }}, &metric_decklink_output_scheduled_frames_late);
69                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "normal" }}, &metric_decklink_output_scheduled_frames_normal);
70                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "preroll" }}, &metric_decklink_output_scheduled_frames_preroll);
71
72                 global_metrics.add("decklink_output_completed_frames", {{ "status", "completed" }}, &metric_decklink_output_completed_frames_completed);
73                 global_metrics.add("decklink_output_completed_frames", {{ "status", "dropped" }}, &metric_decklink_output_completed_frames_dropped);
74                 global_metrics.add("decklink_output_completed_frames", {{ "status", "flushed" }}, &metric_decklink_output_completed_frames_flushed);
75                 global_metrics.add("decklink_output_completed_frames", {{ "status", "late" }}, &metric_decklink_output_completed_frames_late);
76                 global_metrics.add("decklink_output_completed_frames", {{ "status", "unknown" }}, &metric_decklink_output_completed_frames_unknown);
77
78                 global_metrics.add("decklink_output_scheduled_samples", &metric_decklink_output_scheduled_samples);
79                 vector<double> quantiles{0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99};
80                 metric_decklink_output_margin_seconds.init(quantiles, 60.0);
81                 global_metrics.add("decklink_output_margin_seconds", &metric_decklink_output_margin_seconds);
82         });
83 }
84
85 bool DeckLinkOutput::set_device(IDeckLink *decklink)
86 {
87         if (decklink->QueryInterface(IID_IDeckLinkInput, (void**)&input) != S_OK) {
88                 input = nullptr;
89         }
90         if (decklink->QueryInterface(IID_IDeckLinkOutput, (void**)&output) != S_OK) {
91                 fprintf(stderr, "Warning: Card %u has no outputs\n", card_index);
92                 return false;
93         }
94
95         IDeckLinkDisplayModeIterator *mode_it;
96         if (output->GetDisplayModeIterator(&mode_it) != S_OK) {
97                 fprintf(stderr, "Warning: Failed to enumerate output display modes for card %u\n", card_index);
98                 return false;
99         }
100
101         video_modes.clear();
102
103         for (const auto &it : summarize_video_modes(mode_it, card_index)) {
104                 if (it.second.width != width || it.second.height != height) {
105                         continue;
106                 }
107
108                 // We could support interlaced modes, but let's stay out of it for now,
109                 // since we don't have interlaced stream output.
110                 if (it.second.interlaced) {
111                         continue;
112                 }
113
114                 video_modes.insert(it);
115         }
116
117         mode_it->Release();
118
119         // HDMI or SDI generally mean “both HDMI and SDI at the same time” on DeckLink cards
120         // that support both; pick_default_video_connection() will generally pick one of those
121         // if they exist. (--prefer-hdmi-input would also affect the selection despite the name
122         // of the option, but since either generally means both, it's inconsequential.)
123         // We're not very likely to need analog outputs, so we don't need a way to change
124         // beyond that.
125         video_connection = pick_default_video_connection(decklink, BMDDeckLinkVideoOutputConnections, card_index);
126         return true;
127 }
128
129 void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts, bool is_master_card)
130 {
131         assert(output);
132         assert(!playback_initiated);
133
134         if (video_modes.empty()) {
135                 fprintf(stderr, "ERROR: No matching output modes for %dx%d found\n", width, height);
136                 abort();
137         }
138
139         should_quit.unquit();
140         playback_initiated = true;
141         playback_started = false;
142         if (is_master_card) {
143                 this->base_pts = base_pts;
144         } else {
145                 this->next_output_pts = 0;
146         }
147
148         IDeckLinkConfiguration *config = nullptr;
149         if (output->QueryInterface(IID_IDeckLinkConfiguration, (void**)&config) != S_OK) {
150                 fprintf(stderr, "Failed to get configuration interface for output card\n");
151                 abort();
152         }
153         if (config->SetFlag(bmdDeckLinkConfigLowLatencyVideoOutput, true) != S_OK) {
154                 fprintf(stderr, "Failed to set low latency output\n");
155                 abort();
156         }
157         if (config->SetInt(bmdDeckLinkConfigVideoOutputConnection, video_connection) != S_OK) {
158                 fprintf(stderr, "Failed to set video output connection for card %u\n", card_index);
159                 abort();
160         }
161         if (config->SetFlag(bmdDeckLinkConfigOutput1080pAsPsF, true) != S_OK) {
162                 fprintf(stderr, "Failed to set PsF flag for card\n");
163                 abort();
164         }
165         if (config->SetFlag(bmdDeckLinkConfigSMPTELevelAOutput, true) != S_OK) {
166                 // This affects at least some no-name SDI->HDMI converters.
167                 // Warn, but don't die.
168                 fprintf(stderr, "WARNING: Failed to enable SMTPE Level A; resolutions like 1080p60 might have issues.\n");
169         }
170
171         BMDDisplayModeSupport support;
172         IDeckLinkDisplayMode *display_mode;
173         BMDPixelFormat pixel_format = global_flags.ten_bit_output ? bmdFormat10BitYUV : bmdFormat8BitYUV;
174         if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault,
175                                          &support, &display_mode) != S_OK) {
176                 fprintf(stderr, "Couldn't ask for format support\n");
177                 abort();
178         }
179
180         if (support == bmdDisplayModeNotSupported) {
181                 fprintf(stderr, "Requested display mode not supported\n");
182                 abort();
183         }
184
185         current_mode_flags = display_mode->GetFlags();
186
187         BMDTimeValue time_value;
188         BMDTimeScale time_scale;
189         if (display_mode->GetFrameRate(&time_value, &time_scale) != S_OK) {
190                 fprintf(stderr, "Couldn't get frame rate\n");
191                 abort();
192         }
193
194         metric_decklink_output_width_pixels = width;
195         metric_decklink_output_height_pixels = height;
196         metric_decklink_output_frame_rate_nom = time_value;
197         metric_decklink_output_frame_rate_den = time_scale;
198
199         frame_duration = time_value * TIMEBASE / time_scale;
200
201         display_mode->Release();
202
203         if (input != nullptr) {
204                 if (input->DisableVideoInput() != S_OK) {
205                         fprintf(stderr, "Warning: Failed to disable video input for card %d\n", card_index);
206                 }
207                 if (input->DisableAudioInput() != S_OK) {
208                         fprintf(stderr, "Warning: Failed to disable audio input for card %d\n", card_index);
209                 }
210         }
211
212         HRESULT result = output->EnableVideoOutput(mode, bmdVideoOutputFlagDefault);
213         if (result != S_OK) {
214                 fprintf(stderr, "Couldn't enable output with error 0x%x\n", result);
215                 abort();
216         }
217         if (output->SetScheduledFrameCompletionCallback(this) != S_OK) {
218                 fprintf(stderr, "Couldn't set callback\n");
219                 abort();
220         }
221         assert(OUTPUT_FREQUENCY == 48000);
222         if (output->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, 2, bmdAudioOutputStreamTimestamped) != S_OK) {
223                 fprintf(stderr, "Couldn't enable audio output\n");
224                 abort();
225         }
226         if (is_master_card) {
227                 if (output->BeginAudioPreroll() != S_OK) {
228                         fprintf(stderr, "Couldn't begin audio preroll\n");
229                         abort();
230                 }
231         } else {
232                 if (output->StartScheduledPlayback(/*base_pts=*/0, TIMEBASE, 1.0) != S_OK) {
233                         fprintf(stderr, "Could not start playback\n");
234                         abort();  // TODO
235                 }
236                 playback_started = true;
237         }
238
239         present_thread = thread([this]{
240                 QOpenGLContext *context = create_context(this->surface);
241                 eglBindAPI(EGL_OPENGL_API);
242                 if (!make_current(context, this->surface)) {
243                         printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(),
244                                 eglGetError());
245                         abort();
246                 }
247                 present_thread_func();
248                 delete_context(context);
249         });
250 }
251
252 void DeckLinkOutput::end_output()
253 {
254         if (!playback_initiated) {
255                 return;
256         }
257
258         should_quit.quit();
259         frame_queues_changed.notify_all();
260         present_thread.join();
261         playback_initiated = false;
262
263         output->StopScheduledPlayback(0, nullptr, 0);
264         output->DisableVideoOutput();
265         output->DisableAudioOutput();
266
267         // Wait until all frames are accounted for, and free them.
268         {
269                 unique_lock<mutex> lock(frame_queue_mutex);
270                 while (!(frame_freelist.empty() && scheduled_frames.empty())) {
271                         frame_queues_changed.wait(lock, [this]{ return !frame_freelist.empty(); });
272                         frame_freelist.pop();
273                 }
274         }
275
276         if (input != nullptr) {
277                 input->Release();
278                 input = nullptr;
279         }
280         if (output != nullptr) {
281                 output->Release();
282                 output = nullptr;
283         }
284 }
285
286 void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, int64_t pts, int64_t duration)
287 {
288         assert(!should_quit.should_quit());
289
290         input_jitter_history.frame_arrived(steady_clock::now(), duration, /*dropped_frames=*/0, true);
291
292         if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) {
293                 if (!last_frame_had_mode_mismatch) {
294                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n");
295                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec601 (or =auto).\n");
296                 }
297                 last_frame_had_mode_mismatch = true;
298                 ++metric_decklink_output_color_mismatch_frames;
299         } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) {
300                 if (!last_frame_had_mode_mismatch) {
301                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n");
302                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec709 (or =auto).\n");
303                 }
304                 last_frame_had_mode_mismatch = true;
305                 ++metric_decklink_output_color_mismatch_frames;
306         } else {
307                 last_frame_had_mode_mismatch = false;
308         }
309
310         unique_ptr<Frame> frame = get_frame();
311         if (global_flags.ten_bit_output) {
312                 chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
313         } else {
314                 chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
315         }
316
317         // Download the UYVY texture to the PBO.
318         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
319         check_error();
320
321         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
322         check_error();
323
324         if (global_flags.ten_bit_output) {
325                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
326                 check_error();
327                 glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0));
328                 check_error();
329         } else {
330                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
331                 check_error();
332                 glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
333                 check_error();
334         }
335
336         glBindTexture(GL_TEXTURE_2D, 0);
337         check_error();
338         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
339         check_error();
340
341         glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
342         check_error();
343
344         frame->fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
345         check_error();
346         glFlush();  // Make the DeckLink thread see the fence as soon as possible.
347         check_error();
348
349         frame->input_frames = input_frames;
350         frame->received_ts = find_received_timestamp(input_frames);
351         frame->pts = pts;
352         frame->duration = duration;
353
354         {
355                 lock_guard<mutex> lock(frame_queue_mutex);
356                 pending_video_frames.push(move(frame));
357         }
358         frame_queues_changed.notify_all();
359 }
360
361 void DeckLinkOutput::send_audio(int64_t pts, const std::vector<float> &samples)
362 {
363         return;  // FIXME we may need to map pts or something?
364
365         unique_ptr<int32_t[]> int_samples(new int32_t[samples.size()]);
366         for (size_t i = 0; i < samples.size(); ++i) {
367                 int_samples[i] = lrintf(samples[i] * 2147483648.0f);
368         }
369
370         uint32_t frames_written;
371         HRESULT result = output->ScheduleAudioSamples(int_samples.get(), samples.size() / 2,
372                 pts, TIMEBASE, &frames_written);
373         if (result != S_OK) {
374                 fprintf(stderr, "ScheduleAudioSamples(pts=%" PRId64 ") failed (result=0x%08x)\n", pts, result);
375         } else {
376                 if (frames_written != samples.size() / 2) {
377                         fprintf(stderr, "ScheduleAudioSamples() returned short write (%u/%zu)\n", frames_written, samples.size() / 2);
378                 }
379         }
380         metric_decklink_output_scheduled_samples += samples.size() / 2;
381 }
382
383 void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *frame_duration, bool *is_preroll, steady_clock::time_point *frame_timestamp)
384 {
385         assert(!should_quit.should_quit());
386
387         *dropped_frames = 0;
388         *frame_duration = this->frame_duration;
389
390         const BMDTimeValue buffer = lrint(*frame_duration * global_flags.output_buffer_frames);
391         const BMDTimeValue max_overshoot = lrint(*frame_duration * global_flags.output_slop_frames);
392         BMDTimeValue target_time = pts - buffer;
393
394         // While prerolling, we send out frames as quickly as we can.
395         if (target_time < base_pts) {
396                 *is_preroll = true;
397                 ++metric_decklink_output_scheduled_frames_preroll;
398                 return;
399         }
400
401         *is_preroll = !playback_started;
402
403         if (!playback_started) {
404                 if (output->EndAudioPreroll() != S_OK) {
405                         fprintf(stderr, "Could not end audio preroll\n");
406                         abort();  // TODO
407                 }
408                 if (output->StartScheduledPlayback(base_pts, TIMEBASE, 1.0) != S_OK) {
409                         fprintf(stderr, "Could not start playback\n");
410                         abort();  // TODO
411                 }
412                 playback_started = true;
413         }
414
415         BMDTimeValue stream_frame_time;
416         double playback_speed;
417         output->GetScheduledStreamTime(TIMEBASE, &stream_frame_time, &playback_speed);
418
419         *frame_timestamp = steady_clock::now() +
420                 nanoseconds((target_time - stream_frame_time) * 1000000000 / TIMEBASE);
421
422         metric_decklink_output_margin_seconds.count_event(
423                 (target_time - stream_frame_time) / double(TIMEBASE));
424
425         // If we're ahead of time, wait for the frame to (approximately) start.
426         if (stream_frame_time < target_time) {
427                 should_quit.sleep_until(*frame_timestamp);
428                 ++metric_decklink_output_scheduled_frames_normal;
429                 return;
430         }
431
432         // If we overshot the previous frame by just a little,
433         // fire off one immediately.
434         if (stream_frame_time < target_time + max_overshoot) {
435                 fprintf(stderr, "Warning: Frame was %ld ms late (but not skipping it due to --output-slop-frames).\n",
436                         lrint((stream_frame_time - target_time) * 1000.0 / TIMEBASE));
437                 ++metric_decklink_output_scheduled_frames_late;
438                 return;
439         }
440
441         // Oops, we missed by more than one frame. Return immediately,
442         // but drop so that we catch up.
443         *dropped_frames = (stream_frame_time - target_time + *frame_duration - 1) / *frame_duration;
444         const int64_t ns_per_frame = this->frame_duration * 1000000000 / TIMEBASE;
445         *frame_timestamp += nanoseconds(*dropped_frames * ns_per_frame);
446         fprintf(stderr, "Dropped %d output frames; skipping.\n", *dropped_frames);
447         metric_decklink_output_scheduled_frames_dropped += *dropped_frames;
448         ++metric_decklink_output_scheduled_frames_normal;
449 }
450
451 uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const
452 {
453         if (video_modes.count(mode)) {
454                 return mode;
455         }
456
457         // Prioritize 59.94 > 60 > 29.97. If none of those are found, then pick the highest one.
458         for (const pair<int, int> &desired : vector<pair<int, int>>{ { 50, 1 }, { 60000, 1001 }, { 60, 1 }, { 30000, 1001 } }) {
459                 for (const auto &it : video_modes) {
460                         if (it.second.frame_rate_num * desired.second == desired.first * it.second.frame_rate_den) {
461                                 return it.first;
462                         }
463                 }
464         }
465
466         uint32_t best_mode = 0;
467         double best_fps = 0.0;
468         for (const auto &it : video_modes) {
469                 double fps = double(it.second.frame_rate_num) / it.second.frame_rate_den;
470                 if (fps > best_fps) {
471                         best_mode = it.first;
472                         best_fps = fps;
473                 }
474         }
475         return best_mode;
476 }
477
478 YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const
479 {
480         if (current_mode_flags & bmdDisplayModeColorspaceRec601) {
481                 return YCBCR_REC_601;
482         } else {
483                 // Don't bother checking bmdDisplayModeColorspaceRec709;
484                 // if none is set, 709 is a good default anyway.
485                 return YCBCR_REC_709;
486         }
487 }
488
489 HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result)
490 {
491         Frame *frame = static_cast<Frame *>(completedFrame);
492
493         BMDTimeValue stream_frame_time, played_at_time;
494         BMDTimeValue hardwareTime, timeInFrame, ticksPerFrame;
495         double playback_speed;
496         output->GetFrameCompletionReferenceTimestamp(frame, TIMEBASE, &played_at_time);
497         output->GetScheduledStreamTime(TIMEBASE, &stream_frame_time, &playback_speed);
498         output->GetHardwareReferenceClock(TIMEBASE, &hardwareTime, &timeInFrame, &ticksPerFrame);
499
500         steady_clock::time_point now = steady_clock::now();
501         int frame_delay = (stream_frame_time - frame->pts) / frame_duration - 1;
502         map<int, string> status = {
503                 { bmdOutputFrameCompleted, "played" },
504                 { bmdOutputFrameDisplayedLate, "DELAYED" },
505                 { bmdOutputFrameDropped, "DROPPED" },
506                 { bmdOutputFrameFlushed, "FLUSHED" }
507         };
508
509         if ((result == bmdOutputFrameCompleted || result == bmdOutputFrameDisplayedLate) && false) {
510                 fprintf(stderr, "now=%ld / %.2f: frame with pts=%ld (%ld ago, %d delay) / %.2f was %s at time %ld (%ld ago)\n",
511                         stream_frame_time, PTSToTime(stream_frame_time),
512                         frame->pts, stream_frame_time - frame->pts, frame_delay, PTSToTime(frame->pts),
513                         status[result].c_str(),
514                         played_at_time, hardwareTime - played_at_time);
515         } else if (result == bmdOutputFrameDisplayedLate) {
516                 fprintf(stderr, "now=%ld / %.2f: frame with pts=%ld (%ld ago, %d delay) / %.2f was %s to %.2f\n",
517                         stream_frame_time, PTSToTime(stream_frame_time),
518                         frame->pts, stream_frame_time - frame->pts, frame_delay, PTSToTime(frame->pts),
519                         status[result].c_str(), PTSToTime(frame->pts) + frame_delay);
520         } else {
521                 fprintf(stderr, "now=%ld / %.2f: frame with pts=%ld (%ld ago, %d delay) / %.2f was %s\n",
522                         stream_frame_time, PTSToTime(stream_frame_time),
523                         frame->pts, stream_frame_time - frame->pts, frame_delay, PTSToTime(frame->pts),
524                         status[result].c_str());
525         }
526         if (frame_delay < 0) {
527                 fprintf(stderr, "ERROR: Frame went backwards in time (scheduled to start at pts=%ld, ended at or before pts=%ld), something is strange.\n",
528                         frame->pts, stream_frame_time);
529                 frame_delay = 0;
530         }
531         
532         switch (result) {
533         case bmdOutputFrameCompleted:
534                 ++metric_decklink_output_completed_frames_completed;
535                 if (frame_delay != 0) {
536                         fprintf(stderr, "ERROR: Frame was reportedly completed without delay, but was delayed nevertheless.\n");
537                         // Our callback _might_ be delayed 1+ frame for other reasons,
538                         // so ignore this. It's a pity GetFrameCompletionReferenceTimestamp()
539                         // cannot give us a timestamp on the same time scale as
540                         // GetScheduledStreamTime(); it would be more robust.
541                         frame_delay = 0;
542                 }
543                 break;
544         case bmdOutputFrameDisplayedLate:
545         //      fprintf(stderr, "Output frame displayed late (pts=%" PRId64 ")\n", frame->pts);
546                 //fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
547                 ++metric_decklink_output_completed_frames_late;
548                 break;
549         case bmdOutputFrameDropped:
550         //      fprintf(stderr, "Output frame was dropped (pts=%" PRId64 ")\n", frame->pts);
551                 //fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
552                 ++metric_decklink_output_completed_frames_dropped;
553                 break;
554         case bmdOutputFrameFlushed:
555         //      fprintf(stderr, "Output frame was flushed (pts=%" PRId64 ")\n", frame->pts);
556                 ++metric_decklink_output_completed_frames_flushed;
557                 break;
558         default:
559                 fprintf(stderr, "Output frame completed with unknown status %d\n", result);
560                 ++metric_decklink_output_completed_frames_unknown;
561                 break;
562         }
563
564         static int frameno = 0;
565         print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram);
566
567         {
568                 lock_guard<mutex> lock(frame_queue_mutex);
569                 frame_freelist.push(unique_ptr<Frame>(frame));
570
571                 // Dropped frames can come out-of-order, so we can't just look at the front;
572                 // we need to go and find it in the list.
573                 auto it = find(scheduled_frames.begin(), scheduled_frames.end(), frame);
574                 assert(it != scheduled_frames.end());
575                 scheduled_frames.erase(it);
576                 --metric_decklink_output_inflight_frames;
577
578                 if (frame_delay > 0 && result == bmdOutputFrameDisplayedLate) {
579                         // All frames that were queued earlier will be delayed,
580                         // so update so that we don't double-count the delay.
581                         int64_t prev_frame_pts = frame->pts + frame_delay * frame_duration;
582                         for (Frame *other_frame : scheduled_frames) {
583                                 int64_t old_pts = other_frame->pts;
584                                 other_frame->pts = std::max(other_frame->pts, prev_frame_pts + frame_duration);
585                                 fprintf(stderr, " - moving frame from pts=%ld (%.3f) to pts=%ld (%.3f)\n",
586                                         old_pts, PTSToTime(old_pts), other_frame->pts, PTSToTime(other_frame->pts));
587                                 prev_frame_pts = other_frame->pts;
588                         }
589
590                         int64_t earliest_next_frame = (stream_frame_time + frame_duration - 1) / frame_duration * frame_duration;
591                         earliest_next_frame = std::max(earliest_next_frame, prev_frame_pts + frame_duration);
592                         if (next_output_pts < earliest_next_frame) {
593                                 // In effect, duplicate a frame. FIXME write something about this
594                                 // FIXME is this really right now? but perhaps we're forced
595                                 // and it messes up the queue length calculation temporarily, we need something else there
596                                 //fprintf(stderr, "Duplicating frame %d times due to starvation!\n", frame_delay);
597
598                                 // FIXME this causes us to believe in output jitter?
599                                 fprintf(stderr, " - moving output pointer from pts=%ld (%.3f) to pts=%ld (%.3f)\n",
600                                         next_output_pts, PTSToTime(next_output_pts), earliest_next_frame, PTSToTime(earliest_next_frame));
601                                 next_output_pts = earliest_next_frame;
602                                 // FIXME metric
603                         }
604                 }
605
606                 if (result == bmdOutputFrameCompleted || result == bmdOutputFrameDisplayedLate) {
607                         //output_jitter_history.frame_arrived(now, frame_duration, /*dropped_frames=*/frame_delay, true);
608                         // TODO: backdate now, and possibly also get_expected_next_frame()?
609                         queue_length_policy.update_policy(
610                                 now,
611                                 input_jitter_history.get_expected_next_frame(),
612                                 frame->duration, frame_duration,
613                                 input_jitter_history.estimate_max_jitter(),
614                         //      output_jitter_history.estimate_max_jitter(), true);
615                                 0.0, true);
616                         num_safe_frames = queue_length_policy.get_safe_queue_length();
617                 }
618                 fprintf(stderr, "%zu frames in flight, safe queue length = %u (starv=%d)\n", scheduled_frames.size(), queue_length_policy.get_safe_queue_length(), frame_delay);
619         }
620
621         return S_OK;
622 }
623
624 HRESULT DeckLinkOutput::ScheduledPlaybackHasStopped()
625 {
626         printf("playback stopped!\n");
627         return S_OK;
628 }
629
630 unique_ptr<DeckLinkOutput::Frame> DeckLinkOutput::get_frame()
631 {
632         lock_guard<mutex> lock(frame_queue_mutex);
633
634         if (!frame_freelist.empty()) {
635                 unique_ptr<Frame> frame = move(frame_freelist.front());
636                 frame_freelist.pop();
637                 return frame;
638         }
639
640         unique_ptr<Frame> frame(new Frame);
641
642         size_t stride;
643         if (global_flags.ten_bit_output) {
644                 stride = v210Converter::get_v210_stride(width);
645                 GLint v210_width = stride / sizeof(uint32_t);
646                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height);
647
648                 // We need valid texture state, or NVIDIA won't allow us to write to the texture.
649                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
650                 check_error();
651                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
652                 check_error();
653         } else {
654                 stride = width * 2;
655                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
656         }
657
658         glGenBuffers(1, &frame->pbo);
659         check_error();
660         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
661         check_error();
662         glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
663         check_error();
664         frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
665         check_error();
666         frame->uyvy_ptr_local.reset(new uint8_t[stride * height]);
667         frame->resource_pool = resource_pool;
668
669         return frame;
670 }
671
672 void DeckLinkOutput::present_thread_func()
673 {
674         pthread_setname_np(pthread_self(), "DeckLinkOutput");
675         for ( ;; ) {
676                 unique_ptr<Frame> frame;
677                 {
678                         unique_lock<mutex> lock(frame_queue_mutex);
679                         frame_queues_changed.wait(lock, [this]{
680                                 return should_quit.should_quit() || !pending_video_frames.empty();
681                         });
682                         if (should_quit.should_quit()) {
683                                 return;
684                         }
685                         frame = move(pending_video_frames.front());
686                         pending_video_frames.pop();
687
688                         if (scheduled_frames.size() > num_safe_frames) {  // FIXME check off-by-one here
689                                 fprintf(stderr, "Dropping frame to keep latency down!\n");
690                                 // FIXME metric
691                                 continue;
692                         }
693
694                         // Overwrite the pts given by the client; it doesn't own our clock.
695                         // TODO: Write something about what semi-unsynchronized really means.
696                         frame->pts = next_output_pts;
697                         next_output_pts += frame_duration;
698                 }
699
700                 for ( ;; ) {
701                         int err = glClientWaitSync(frame->fence.get(), /*flags=*/0, 0);
702                         if (err == GL_TIMEOUT_EXPIRED) {
703                                 // NVIDIA likes to busy-wait; yield instead.
704                                 this_thread::sleep_for(milliseconds(1));
705                         } else {
706                                 break;
707                         }
708                 }
709                 check_error();
710                 frame->fence.reset();
711
712                 if (global_flags.ten_bit_output) {
713                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height);
714                 } else {
715                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
716                 }
717
718                 // Release any input frames we needed to render this frame.
719                 frame->input_frames.clear();
720
721                 BMDTimeValue pts = frame->pts;
722                 BMDTimeValue duration = frame->duration;
723                 HRESULT res = output->ScheduleVideoFrame(frame.get(), pts, duration, TIMEBASE);
724                 lock_guard<mutex> lock(frame_queue_mutex);
725                 if (res == S_OK) {
726                         scheduled_frames.push_back(frame.release());  // Owned by the driver now.
727                         ++metric_decklink_output_inflight_frames;
728                 } else {
729                         fprintf(stderr, "Could not schedule video frame! (error=0x%08x)\n", res);
730
731                         frame_freelist.push(move(frame));
732                 }
733         }
734 }
735
736 double DeckLinkOutput::PTSToTime(int64_t pts)
737 {
738         return double(pts) / frame_duration;
739 }
740
741 HRESULT STDMETHODCALLTYPE DeckLinkOutput::QueryInterface(REFIID, LPVOID *)
742 {
743         return E_NOINTERFACE;
744 }
745
746 ULONG STDMETHODCALLTYPE DeckLinkOutput::AddRef()
747 {
748         return refcount.fetch_add(1) + 1;
749 }
750
751 ULONG STDMETHODCALLTYPE DeckLinkOutput::Release()
752 {
753         int new_ref = refcount.fetch_sub(1) - 1;
754         if (new_ref == 0)
755                 delete this;
756         return new_ref;
757 }
758
759 DeckLinkOutput::Frame::~Frame()
760 {
761         glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
762         check_error();
763         glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
764         check_error();
765         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
766         check_error();
767         glDeleteBuffers(1, &pbo);
768         check_error();
769         resource_pool->release_2d_texture(uyvy_tex);
770         check_error();
771 }
772
773 HRESULT STDMETHODCALLTYPE DeckLinkOutput::Frame::QueryInterface(REFIID, LPVOID *)
774 {
775         return E_NOINTERFACE;
776 }
777
778 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::AddRef()
779 {
780         return refcount.fetch_add(1) + 1;
781 }
782
783 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::Release()
784 {
785         int new_ref = refcount.fetch_sub(1) - 1;
786         if (new_ref == 0)
787                 delete this;
788         return new_ref;
789 }
790
791 long DeckLinkOutput::Frame::GetWidth()
792 {
793         return global_flags.width;
794 }
795
796 long DeckLinkOutput::Frame::GetHeight()
797 {
798         return global_flags.height;
799 }
800
801 long DeckLinkOutput::Frame::GetRowBytes()
802 {
803         if (global_flags.ten_bit_output) {
804                 return v210Converter::get_v210_stride(global_flags.width);
805         } else {
806                 return global_flags.width * 2;
807         }
808 }
809
810 BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat()
811 {
812         if (global_flags.ten_bit_output) {
813                 return bmdFormat10BitYUV;
814         } else {
815                 return bmdFormat8BitYUV;
816         }
817 }
818
819 BMDFrameFlags DeckLinkOutput::Frame::GetFlags()
820 {
821         return bmdFrameFlagDefault;
822 }
823
824 HRESULT DeckLinkOutput::Frame::GetBytes(/* out */ void **buffer)
825 {
826         *buffer = uyvy_ptr_local.get();
827         return S_OK;
828 }
829
830 HRESULT DeckLinkOutput::Frame::GetTimecode(/* in */ BMDTimecodeFormat format, /* out */ IDeckLinkTimecode **timecode)
831 {
832         fprintf(stderr, "STUB: GetTimecode()\n");
833         return E_NOTIMPL;
834 }
835
836 HRESULT DeckLinkOutput::Frame::GetAncillaryData(/* out */ IDeckLinkVideoFrameAncillary **ancillary)
837 {
838         fprintf(stderr, "STUB: GetAncillaryData()\n");
839         return E_NOTIMPL;
840 }