]> git.sesse.net Git - nageru/blob - nageru/decklink_output.cpp
Collapse all the 10-bit flags.
[nageru] / nageru / decklink_output.cpp
1 #include <movit/effect_util.h>
2 #include <movit/util.h>
3 #include <movit/resource_pool.h>  // Must be above the Xlib includes.
4 #include <pthread.h>
5 #include <unistd.h>
6
7 #include <mutex>
8
9 #include <epoxy/egl.h>
10
11 #include "chroma_subsampler.h"
12 #include "decklink_output.h"
13 #include "decklink_util.h"
14 #include "flags.h"
15 #include "shared/metrics.h"
16 #include "print_latency.h"
17 #include "shared/timebase.h"
18 #include "v210_converter.h"
19
20 using namespace movit;
21 using namespace std;
22 using namespace std::chrono;
23
24 namespace {
25
26 // This class can be deleted during regular use, so make all the metrics static.
27 once_flag decklink_metrics_inited;
28 LatencyHistogram latency_histogram;
29 atomic<int64_t> metric_decklink_output_width_pixels{-1};
30 atomic<int64_t> metric_decklink_output_height_pixels{-1};
31 atomic<int64_t> metric_decklink_output_frame_rate_den{-1};
32 atomic<int64_t> metric_decklink_output_frame_rate_nom{-1};
33 atomic<int64_t> metric_decklink_output_inflight_frames{0};
34 atomic<int64_t> metric_decklink_output_color_mismatch_frames{0};
35
36 atomic<int64_t> metric_decklink_output_scheduled_frames_dropped{0};
37 atomic<int64_t> metric_decklink_output_scheduled_frames_late{0};
38 atomic<int64_t> metric_decklink_output_scheduled_frames_normal{0};
39 atomic<int64_t> metric_decklink_output_scheduled_frames_preroll{0};
40
41 atomic<int64_t> metric_decklink_output_completed_frames_completed{0};
42 atomic<int64_t> metric_decklink_output_completed_frames_dropped{0};
43 atomic<int64_t> metric_decklink_output_completed_frames_flushed{0};
44 atomic<int64_t> metric_decklink_output_completed_frames_late{0};
45 atomic<int64_t> metric_decklink_output_completed_frames_unknown{0};
46
47 atomic<int64_t> metric_decklink_output_scheduled_samples{0};
48
49 Summary metric_decklink_output_margin_seconds;
50
51 }  // namespace
52
53 DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, unsigned width, unsigned height, unsigned card_index)
54         : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index)
55 {
56         chroma_subsampler.reset(new ChromaSubsampler(resource_pool));
57
58         call_once(decklink_metrics_inited, [](){
59                 latency_histogram.init("decklink_output");
60                 global_metrics.add("decklink_output_width_pixels", &metric_decklink_output_width_pixels, Metrics::TYPE_GAUGE);
61                 global_metrics.add("decklink_output_height_pixels", &metric_decklink_output_height_pixels, Metrics::TYPE_GAUGE);
62                 global_metrics.add("decklink_output_frame_rate_den", &metric_decklink_output_frame_rate_den, Metrics::TYPE_GAUGE);
63                 global_metrics.add("decklink_output_frame_rate_nom", &metric_decklink_output_frame_rate_nom, Metrics::TYPE_GAUGE);
64                 global_metrics.add("decklink_output_inflight_frames", &metric_decklink_output_inflight_frames, Metrics::TYPE_GAUGE);
65                 global_metrics.add("decklink_output_color_mismatch_frames", &metric_decklink_output_color_mismatch_frames);
66
67                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "dropped" }}, &metric_decklink_output_scheduled_frames_dropped);
68                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "late" }}, &metric_decklink_output_scheduled_frames_late);
69                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "normal" }}, &metric_decklink_output_scheduled_frames_normal);
70                 global_metrics.add("decklink_output_scheduled_frames", {{ "status", "preroll" }}, &metric_decklink_output_scheduled_frames_preroll);
71
72                 global_metrics.add("decklink_output_completed_frames", {{ "status", "completed" }}, &metric_decklink_output_completed_frames_completed);
73                 global_metrics.add("decklink_output_completed_frames", {{ "status", "dropped" }}, &metric_decklink_output_completed_frames_dropped);
74                 global_metrics.add("decklink_output_completed_frames", {{ "status", "flushed" }}, &metric_decklink_output_completed_frames_flushed);
75                 global_metrics.add("decklink_output_completed_frames", {{ "status", "late" }}, &metric_decklink_output_completed_frames_late);
76                 global_metrics.add("decklink_output_completed_frames", {{ "status", "unknown" }}, &metric_decklink_output_completed_frames_unknown);
77
78                 global_metrics.add("decklink_output_scheduled_samples", &metric_decklink_output_scheduled_samples);
79                 vector<double> quantiles{0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99};
80                 metric_decklink_output_margin_seconds.init(quantiles, 60.0);
81                 global_metrics.add("decklink_output_margin_seconds", &metric_decklink_output_margin_seconds);
82         });
83 }
84
85 DeckLinkOutput::~DeckLinkOutput()
86 {
87         if (output != nullptr) {
88                 output->Release();
89         }
90 }
91
92 bool DeckLinkOutput::set_device(IDeckLink *decklink, IDeckLinkInput *input_arg)
93 {
94         input = input_arg;
95         if (decklink->QueryInterface(IID_IDeckLinkOutput, (void**)&output) != S_OK) {
96                 fprintf(stderr, "Warning: Card %u has no outputs\n", card_index);
97                 return false;
98         }
99
100         IDeckLinkDisplayModeIterator *mode_it;
101         if (output->GetDisplayModeIterator(&mode_it) != S_OK) {
102                 fprintf(stderr, "Warning: Failed to enumerate output display modes for card %u\n", card_index);
103                 return false;
104         }
105
106         video_modes.clear();
107
108         for (const auto &it : summarize_video_modes(mode_it, card_index)) {
109                 if (it.second.width != width || it.second.height != height) {
110                         continue;
111                 }
112
113                 // We could support interlaced modes, but let's stay out of it for now,
114                 // since we don't have interlaced stream output.
115                 if (it.second.interlaced) {
116                         continue;
117                 }
118
119                 video_modes.insert(it);
120         }
121
122         mode_it->Release();
123
124         // HDMI or SDI generally mean “both HDMI and SDI at the same time” on DeckLink cards
125         // that support both; pick_default_video_connection() will generally pick one of those
126         // if they exist. (--prefer-hdmi-input would also affect the selection despite the name
127         // of the option, but since either generally means both, it's inconsequential.)
128         // We're not very likely to need analog outputs, so we don't need a way to change
129         // beyond that.
130         video_connection = pick_default_video_connection(decklink, BMDDeckLinkVideoOutputConnections, card_index);
131         return true;
132 }
133
134 void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts, bool is_master_card_arg)
135 {
136         assert(output);
137         assert(!playback_initiated);
138         this->is_master_card = is_master_card_arg;
139
140         if (video_modes.empty()) {
141                 fprintf(stderr, "ERROR: No matching output modes for %dx%d found\n", width, height);
142                 abort();
143         }
144
145         should_quit.unquit();
146         playback_initiated = true;
147         playback_started = false;
148         this->base_pts = base_pts;
149
150         IDeckLinkConfiguration *config = nullptr;
151         if (output->QueryInterface(IID_IDeckLinkConfiguration, (void**)&config) != S_OK) {
152                 fprintf(stderr, "Failed to get configuration interface for output card\n");
153                 abort();
154         }
155         if (config->SetFlag(bmdDeckLinkConfigLowLatencyVideoOutput, true) != S_OK) {
156                 fprintf(stderr, "Failed to set low latency output\n");
157                 abort();
158         }
159         if (config->SetInt(bmdDeckLinkConfigVideoOutputConnection, video_connection) != S_OK) {
160                 fprintf(stderr, "Failed to set video output connection for card %u\n", card_index);
161                 abort();
162         }
163         if (config->SetFlag(bmdDeckLinkConfigOutput1080pAsPsF, true) != S_OK) {
164                 fprintf(stderr, "Failed to set PsF flag for card\n");
165                 abort();
166         }
167         if (config->SetFlag(bmdDeckLinkConfigSMPTELevelAOutput, true) != S_OK) {
168                 // This affects at least some no-name SDI->HDMI converters.
169                 // Warn, but don't die.
170                 fprintf(stderr, "WARNING: Failed to enable SMTPE Level A; resolutions like 1080p60 might have issues.\n");
171         }
172
173         BMDDisplayModeSupport support;
174         IDeckLinkDisplayMode *display_mode;
175         BMDPixelFormat pixel_format = global_flags.bit_depth > 8 ? bmdFormat10BitYUV : bmdFormat8BitYUV;
176         if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault,
177                                          &support, &display_mode) != S_OK) {
178                 fprintf(stderr, "Couldn't ask for format support\n");
179                 abort();
180         }
181
182         if (support == bmdDisplayModeNotSupported) {
183                 fprintf(stderr, "Requested display mode not supported\n");
184                 abort();
185         }
186
187         current_mode_flags = display_mode->GetFlags();
188
189         BMDTimeValue time_value;
190         BMDTimeScale time_scale;
191         if (display_mode->GetFrameRate(&time_value, &time_scale) != S_OK) {
192                 fprintf(stderr, "Couldn't get frame rate\n");
193                 abort();
194         }
195
196         metric_decklink_output_width_pixels = width;
197         metric_decklink_output_height_pixels = height;
198         metric_decklink_output_frame_rate_nom = time_value;
199         metric_decklink_output_frame_rate_den = time_scale;
200
201         frame_duration = time_value * TIMEBASE / time_scale;
202
203         display_mode->Release();
204
205         if (input != nullptr) {
206                 if (input->DisableVideoInput() != S_OK) {
207                         fprintf(stderr, "Warning: Failed to disable video input for card %d\n", card_index);
208                 }
209                 if (input->DisableAudioInput() != S_OK) {
210                         fprintf(stderr, "Warning: Failed to disable audio input for card %d\n", card_index);
211                 }
212         }
213
214         HRESULT result = output->EnableVideoOutput(mode, bmdVideoOutputFlagDefault);
215         if (result != S_OK) {
216                 fprintf(stderr, "Couldn't enable output with error 0x%x\n", result);
217                 abort();
218         }
219         if (output->SetScheduledFrameCompletionCallback(this) != S_OK) {
220                 fprintf(stderr, "Couldn't set callback\n");
221                 abort();
222         }
223         assert(OUTPUT_FREQUENCY == 48000);
224         if (output->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, 2, bmdAudioOutputStreamTimestamped) != S_OK) {
225                 fprintf(stderr, "Couldn't enable audio output\n");
226                 abort();
227         }
228         if (is_master_card) {
229                 if (output->BeginAudioPreroll() != S_OK) {
230                         fprintf(stderr, "Couldn't begin audio preroll\n");
231                         abort();
232                 }
233         } else {
234                 playback_started = true;
235         }
236
237         present_thread = thread([this]{
238                 QOpenGLContext *context = create_context(this->surface);
239                 eglBindAPI(EGL_OPENGL_API);
240                 if (!make_current(context, this->surface)) {
241                         printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(),
242                                 eglGetError());
243                         abort();
244                 }
245                 present_thread_func();
246                 delete_context(context);
247         });
248 }
249
250 void DeckLinkOutput::end_output()
251 {
252         if (!playback_initiated) {
253                 return;
254         }
255
256         should_quit.quit();
257         frame_queues_changed.notify_all();
258         present_thread.join();
259         playback_initiated = false;
260
261         if (is_master_card) {
262                 output->StopScheduledPlayback(0, nullptr, 0);
263         }
264         output->DisableVideoOutput();
265         output->DisableAudioOutput();
266
267         // Wait until all frames are accounted for, and free them.
268         {
269                 unique_lock<mutex> lock(frame_queue_mutex);
270                 while (!(frame_freelist.empty() && scheduled_frames.empty())) {
271                         frame_queues_changed.wait(lock, [this]{ return !frame_freelist.empty(); });
272                         frame_freelist.pop();
273                 }
274         }
275 }
276
277 void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, int64_t pts, int64_t duration)
278 {
279         assert(!should_quit.should_quit());
280
281         if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) {
282                 if (!last_frame_had_mode_mismatch) {
283                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n");
284                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec601 (or =auto).\n");
285                 }
286                 last_frame_had_mode_mismatch = true;
287                 ++metric_decklink_output_color_mismatch_frames;
288         } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) {
289                 if (!last_frame_had_mode_mismatch) {
290                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n");
291                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec709 (or =auto).\n");
292                 }
293                 last_frame_had_mode_mismatch = true;
294                 ++metric_decklink_output_color_mismatch_frames;
295         } else {
296                 last_frame_had_mode_mismatch = false;
297         }
298
299         unique_ptr<Frame> frame = get_frame();
300         if (global_flags.bit_depth > 8) {
301                 chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
302         } else {
303                 chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
304         }
305
306         // Download the UYVY texture to the PBO.
307         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
308         check_error();
309
310         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
311         check_error();
312
313         if (global_flags.bit_depth > 8) {
314                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
315                 check_error();
316                 glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0));
317                 check_error();
318         } else {
319                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
320                 check_error();
321                 glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
322                 check_error();
323         }
324
325         glBindTexture(GL_TEXTURE_2D, 0);
326         check_error();
327         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
328         check_error();
329
330         glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
331         check_error();
332
333         frame->fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
334         check_error();
335         glFlush();  // Make the DeckLink thread see the fence as soon as possible.
336         check_error();
337
338         frame->input_frames = input_frames;
339         frame->received_ts = find_received_timestamp(input_frames);
340         frame->pts = pts;
341         frame->duration = duration;
342
343         {
344                 lock_guard<mutex> lock(frame_queue_mutex);
345                 pending_video_frames.push(move(frame));
346         }
347         frame_queues_changed.notify_all();
348 }
349
350 void DeckLinkOutput::send_audio(int64_t pts, const std::vector<float> &samples)
351 {
352         unique_ptr<int32_t[]> int_samples(new int32_t[samples.size()]);
353         for (size_t i = 0; i < samples.size(); ++i) {
354                 int_samples[i] = lrintf(samples[i] * 2147483648.0f);
355         }
356
357         uint32_t frames_written;
358         HRESULT result;
359         if (is_master_card) {
360                 result = output->ScheduleAudioSamples(int_samples.get(), samples.size() / 2,
361                         pts, TIMEBASE, &frames_written);
362         } else {
363                 result = output->WriteAudioSamplesSync(int_samples.get(), samples.size() / 2,
364                         &frames_written);
365         }
366         if (result != S_OK) {
367                 fprintf(stderr, "write audio to DeckLink (pts=%" PRId64 ") failed (result=0x%08x)\n", pts, result);
368         } else {
369                 // Non-master card is not really synchronized on audio at all, so we don't warn on it.
370                 if (frames_written != samples.size() / 2 && is_master_card) {
371                         fprintf(stderr, "write audio to DeckLink returned short write (%u/%zu)\n", frames_written, samples.size() / 2);
372                 }
373         }
374         metric_decklink_output_scheduled_samples += samples.size() / 2;
375 }
376
377 void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *frame_duration, bool *is_preroll, steady_clock::time_point *frame_timestamp)
378 {
379         assert(!should_quit.should_quit());
380
381         *dropped_frames = 0;
382         *frame_duration = this->frame_duration;
383
384         const BMDTimeValue buffer = lrint(*frame_duration * global_flags.output_buffer_frames);
385         const BMDTimeValue max_overshoot = lrint(*frame_duration * global_flags.output_slop_frames);
386         BMDTimeValue target_time = pts - buffer;
387
388         // While prerolling, we send out frames as quickly as we can.
389         if (target_time < base_pts) {
390                 *is_preroll = true;
391                 ++metric_decklink_output_scheduled_frames_preroll;
392                 return;
393         }
394
395         *is_preroll = !playback_started;
396
397         if (!playback_started) {
398                 if (output->EndAudioPreroll() != S_OK) {
399                         fprintf(stderr, "Could not end audio preroll\n");
400                         abort();  // TODO
401                 }
402                 if (output->StartScheduledPlayback(base_pts, TIMEBASE, 1.0) != S_OK) {
403                         fprintf(stderr, "Could not start playback\n");
404                         abort();  // TODO
405                 }
406                 playback_started = true;
407         }
408
409         BMDTimeValue stream_frame_time;
410         double playback_speed;
411         output->GetScheduledStreamTime(TIMEBASE, &stream_frame_time, &playback_speed);
412
413         *frame_timestamp = steady_clock::now() +
414                 nanoseconds((target_time - stream_frame_time) * 1000000000 / TIMEBASE);
415
416         metric_decklink_output_margin_seconds.count_event(
417                 (target_time - stream_frame_time) / double(TIMEBASE));
418
419         // If we're ahead of time, wait for the frame to (approximately) start.
420         if (stream_frame_time < target_time) {
421                 should_quit.sleep_until(*frame_timestamp);
422                 ++metric_decklink_output_scheduled_frames_normal;
423                 return;
424         }
425
426         // If we overshot the previous frame by just a little,
427         // fire off one immediately.
428         if (stream_frame_time < target_time + max_overshoot) {
429                 fprintf(stderr, "Warning: Frame was %ld ms late (but not skipping it due to --output-slop-frames).\n",
430                         lrint((stream_frame_time - target_time) * 1000.0 / TIMEBASE));
431                 ++metric_decklink_output_scheduled_frames_late;
432                 return;
433         }
434
435         // Oops, we missed by more than one frame. Return immediately,
436         // but drop so that we catch up.
437         *dropped_frames = (stream_frame_time - target_time + *frame_duration - 1) / *frame_duration;
438         const int64_t ns_per_frame = this->frame_duration * 1000000000 / TIMEBASE;
439         *frame_timestamp += nanoseconds(*dropped_frames * ns_per_frame);
440         fprintf(stderr, "Dropped %d output frames; skipping.\n", *dropped_frames);
441         metric_decklink_output_scheduled_frames_dropped += *dropped_frames;
442         ++metric_decklink_output_scheduled_frames_normal;
443 }
444
445 uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const
446 {
447         if (video_modes.count(mode)) {
448                 return mode;
449         }
450
451         // Prioritize 59.94 > 60 > 29.97. If none of those are found, then pick the highest one.
452         for (const pair<int, int> &desired : vector<pair<int, int>>{ { 60000, 1001 }, { 60, 1 }, { 30000, 1001 } }) {
453                 for (const auto &it : video_modes) {
454                         if (it.second.frame_rate_num * desired.second == desired.first * it.second.frame_rate_den) {
455                                 return it.first;
456                         }
457                 }
458         }
459
460         uint32_t best_mode = 0;
461         double best_fps = 0.0;
462         for (const auto &it : video_modes) {
463                 double fps = double(it.second.frame_rate_num) / it.second.frame_rate_den;
464                 if (fps > best_fps) {
465                         best_mode = it.first;
466                         best_fps = fps;
467                 }
468         }
469         return best_mode;
470 }
471
472 YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const
473 {
474         if (current_mode_flags & bmdDisplayModeColorspaceRec601) {
475                 return YCBCR_REC_601;
476         } else {
477                 // Don't bother checking bmdDisplayModeColorspaceRec709;
478                 // if none is set, 709 is a good default anyway.
479                 return YCBCR_REC_709;
480         }
481 }
482
483 HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result)
484 {
485         Frame *frame = static_cast<Frame *>(completedFrame);
486         switch (result) {
487         case bmdOutputFrameCompleted:
488                 ++metric_decklink_output_completed_frames_completed;
489                 break;
490         case bmdOutputFrameDisplayedLate:
491                 fprintf(stderr, "Output frame displayed late (pts=%" PRId64 ")\n", frame->pts);
492                 fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
493                 ++metric_decklink_output_completed_frames_late;
494                 break;
495         case bmdOutputFrameDropped:
496                 fprintf(stderr, "Output frame was dropped (pts=%" PRId64 ")\n", frame->pts);
497                 fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
498                 ++metric_decklink_output_completed_frames_dropped;
499                 break;
500         case bmdOutputFrameFlushed:
501                 fprintf(stderr, "Output frame was flushed (pts=%" PRId64 ")\n", frame->pts);
502                 ++metric_decklink_output_completed_frames_flushed;
503                 break;
504         default:
505                 fprintf(stderr, "Output frame completed with unknown status %d\n", result);
506                 ++metric_decklink_output_completed_frames_unknown;
507                 break;
508         }
509
510         static int frameno = 0;
511         print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram);
512
513         {
514                 lock_guard<mutex> lock(frame_queue_mutex);
515                 frame_freelist.push(unique_ptr<Frame>(frame));
516                 assert(scheduled_frames.count(frame));
517                 scheduled_frames.erase(frame);
518                 --metric_decklink_output_inflight_frames;
519         }
520
521         return S_OK;
522 }
523
524 HRESULT DeckLinkOutput::ScheduledPlaybackHasStopped()
525 {
526         printf("playback stopped!\n");
527         return S_OK;
528 }
529
530 unique_ptr<DeckLinkOutput::Frame> DeckLinkOutput::get_frame()
531 {
532         lock_guard<mutex> lock(frame_queue_mutex);
533
534         if (!frame_freelist.empty()) {
535                 unique_ptr<Frame> frame = move(frame_freelist.front());
536                 frame_freelist.pop();
537                 return frame;
538         }
539
540         unique_ptr<Frame> frame(new Frame);
541
542         size_t stride;
543         if (global_flags.bit_depth > 8) {
544                 stride = v210Converter::get_v210_stride(width);
545                 GLint v210_width = stride / sizeof(uint32_t);
546                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height);
547
548                 // We need valid texture state, or NVIDIA won't allow us to write to the texture.
549                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
550                 check_error();
551                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
552                 check_error();
553         } else {
554                 stride = width * 2;
555                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
556         }
557
558         glGenBuffers(1, &frame->pbo);
559         check_error();
560         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
561         check_error();
562         glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
563         check_error();
564         frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
565         check_error();
566         frame->uyvy_ptr_local.reset(new uint8_t[stride * height]);
567         frame->resource_pool = resource_pool;
568
569         return frame;
570 }
571
572 void DeckLinkOutput::present_thread_func()
573 {
574         pthread_setname_np(pthread_self(), "DeckLinkOutput");
575         for ( ;; ) {
576                 unique_ptr<Frame> frame;
577                 {
578                         unique_lock<mutex> lock(frame_queue_mutex);
579                         frame_queues_changed.wait(lock, [this]{
580                                 return should_quit.should_quit() || !pending_video_frames.empty();
581                         });
582                         if (should_quit.should_quit()) {
583                                 return;
584                         }
585                         frame = move(pending_video_frames.front());
586                         pending_video_frames.pop();
587                 }
588
589                 for ( ;; ) {
590                         int err = glClientWaitSync(frame->fence.get(), /*flags=*/0, 0);
591                         if (err == GL_TIMEOUT_EXPIRED) {
592                                 // NVIDIA likes to busy-wait; yield instead.
593                                 this_thread::sleep_for(milliseconds(1));
594                         } else {
595                                 break;
596                         }
597                 }
598                 check_error();
599                 frame->fence.reset();
600
601                 if (global_flags.bit_depth > 8) {
602                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height);
603                 } else {
604                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
605                 }
606
607                 // Release any input frames we needed to render this frame.
608                 frame->input_frames.clear();
609
610                 if (is_master_card) {
611                         BMDTimeValue pts = frame->pts;
612                         BMDTimeValue duration = frame->duration;
613                         HRESULT res = output->ScheduleVideoFrame(frame.get(), pts, duration, TIMEBASE);
614                         lock_guard<mutex> lock(frame_queue_mutex);
615                         if (res == S_OK) {
616                                 scheduled_frames.insert(frame.release());  // Owned by the driver now.
617                                 ++metric_decklink_output_inflight_frames;
618                         } else {
619                                 fprintf(stderr, "Could not schedule video frame! (error=0x%08x)\n", res);
620
621                                 frame_freelist.push(move(frame));
622                         }
623                 } else {
624                         HRESULT res = output->DisplayVideoFrameSync(frame.get());
625                         if (res != S_OK) {
626                                 fprintf(stderr, "Could not schedule video frame! (error=0x%08x)\n", res);
627                         }
628                         frame_freelist.push(move(frame));
629                 }
630         }
631 }
632
633 HRESULT STDMETHODCALLTYPE DeckLinkOutput::QueryInterface(REFIID, LPVOID *)
634 {
635         return E_NOINTERFACE;
636 }
637
638 ULONG STDMETHODCALLTYPE DeckLinkOutput::AddRef()
639 {
640         return refcount.fetch_add(1) + 1;
641 }
642
643 ULONG STDMETHODCALLTYPE DeckLinkOutput::Release()
644 {
645         int new_ref = refcount.fetch_sub(1) - 1;
646         if (new_ref == 0)
647                 delete this;
648         return new_ref;
649 }
650
651 DeckLinkOutput::Frame::~Frame()
652 {
653         glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
654         check_error();
655         glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
656         check_error();
657         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
658         check_error();
659         glDeleteBuffers(1, &pbo);
660         check_error();
661         resource_pool->release_2d_texture(uyvy_tex);
662         check_error();
663 }
664
665 HRESULT STDMETHODCALLTYPE DeckLinkOutput::Frame::QueryInterface(REFIID, LPVOID *)
666 {
667         return E_NOINTERFACE;
668 }
669
670 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::AddRef()
671 {
672         return refcount.fetch_add(1) + 1;
673 }
674
675 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::Release()
676 {
677         int new_ref = refcount.fetch_sub(1) - 1;
678         if (new_ref == 0)
679                 delete this;
680         return new_ref;
681 }
682
683 long DeckLinkOutput::Frame::GetWidth()
684 {
685         return global_flags.width;
686 }
687
688 long DeckLinkOutput::Frame::GetHeight()
689 {
690         return global_flags.height;
691 }
692
693 long DeckLinkOutput::Frame::GetRowBytes()
694 {
695         if (global_flags.bit_depth > 8) {
696                 return v210Converter::get_v210_stride(global_flags.width);
697         } else {
698                 return global_flags.width * 2;
699         }
700 }
701
702 BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat()
703 {
704         if (global_flags.bit_depth > 8) {
705                 return bmdFormat10BitYUV;
706         } else {
707                 return bmdFormat8BitYUV;
708         }
709 }
710
711 BMDFrameFlags DeckLinkOutput::Frame::GetFlags()
712 {
713         return bmdFrameFlagDefault;
714 }
715
716 HRESULT DeckLinkOutput::Frame::GetBytes(/* out */ void **buffer)
717 {
718         *buffer = uyvy_ptr_local.get();
719         return S_OK;
720 }
721
722 HRESULT DeckLinkOutput::Frame::GetTimecode(/* in */ BMDTimecodeFormat format, /* out */ IDeckLinkTimecode **timecode)
723 {
724         fprintf(stderr, "STUB: GetTimecode()\n");
725         return E_NOTIMPL;
726 }
727
728 HRESULT DeckLinkOutput::Frame::GetAncillaryData(/* out */ IDeckLinkVideoFrameAncillary **ancillary)
729 {
730         fprintf(stderr, "STUB: GetAncillaryData()\n");
731         return E_NOTIMPL;
732 }