]> git.sesse.net Git - nageru/blob - decklink_output.cpp
Be slightly clearer about what happens when we try to enable output with no matching...
[nageru] / decklink_output.cpp
1 #include <movit/effect_util.h>
2 #include <movit/util.h>
3 #include <movit/resource_pool.h>  // Must be above the Xlib includes.
4 #include <pthread.h>
5
6 #include <epoxy/egl.h>
7
8 #include "chroma_subsampler.h"
9 #include "decklink_output.h"
10 #include "decklink_util.h"
11 #include "flags.h"
12 #include "print_latency.h"
13 #include "resource_pool.h"
14 #include "timebase.h"
15 #include "v210_converter.h"
16
17 using namespace movit;
18 using namespace std;
19 using namespace std::chrono;
20
21 DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, unsigned width, unsigned height, unsigned card_index)
22         : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index)
23 {
24         chroma_subsampler.reset(new ChromaSubsampler(resource_pool));
25 }
26
27 void DeckLinkOutput::set_device(IDeckLink *decklink)
28 {
29         if (decklink->QueryInterface(IID_IDeckLinkOutput, (void**)&output) != S_OK) {
30                 fprintf(stderr, "Card %u has no outputs\n", card_index);
31                 exit(1);
32         }
33
34         IDeckLinkDisplayModeIterator *mode_it;
35         if (output->GetDisplayModeIterator(&mode_it) != S_OK) {
36                 fprintf(stderr, "Failed to enumerate output display modes for card %u\n", card_index);
37                 exit(1);
38         }
39
40         video_modes.clear();
41
42         for (const auto &it : summarize_video_modes(mode_it, card_index)) {
43                 if (it.second.width != width || it.second.height != height) {
44                         continue;
45                 }
46
47                 // We could support interlaced modes, but let's stay out of it for now,
48                 // since we don't have interlaced stream output.
49                 if (it.second.interlaced) {
50                         continue;
51                 }
52
53                 video_modes.insert(it);
54         }
55
56         mode_it->Release();
57
58         // HDMI or SDI generally mean “both HDMI and SDI at the same time” on DeckLink cards
59         // that support both; pick_default_video_connection() will generally pick one of those
60         // if they exist. We're not very likely to need analog outputs, so we don't need a way
61         // to change beyond that.
62         video_connection = pick_default_video_connection(decklink, BMDDeckLinkVideoOutputConnections, card_index);
63 }
64
65 void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
66 {
67         assert(output);
68         assert(!playback_initiated);
69
70         if (video_modes.empty()) {
71                 fprintf(stderr, "ERROR: No matching output modes for %dx%d found\n", width, height);
72                 exit(1);
73         }
74
75         should_quit.unquit();
76         playback_initiated = true;
77         playback_started = false;
78         this->base_pts = base_pts;
79
80         IDeckLinkConfiguration *config = nullptr;
81         if (output->QueryInterface(IID_IDeckLinkConfiguration, (void**)&config) != S_OK) {
82                 fprintf(stderr, "Failed to get configuration interface for output card\n");
83                 exit(1);
84         }
85         if (config->SetFlag(bmdDeckLinkConfigLowLatencyVideoOutput, true) != S_OK) {
86                 fprintf(stderr, "Failed to set low latency output\n");
87                 exit(1);
88         }
89         if (config->SetInt(bmdDeckLinkConfigVideoOutputConnection, video_connection) != S_OK) {
90                 fprintf(stderr, "Failed to set video output connection for card %u\n", card_index);
91                 exit(1);
92         }
93         if (config->SetFlag(bmdDeckLinkConfigUse1080pNotPsF, true) != S_OK) {
94                 fprintf(stderr, "Failed to set PsF flag for card\n");
95                 exit(1);
96         }
97         if (config->SetFlag(bmdDeckLinkConfigSMPTELevelAOutput, true) != S_OK) {
98                 // This affects at least some no-name SDI->HDMI converters.
99                 // Warn, but don't die.
100                 fprintf(stderr, "WARNING: Failed to enable SMTPE Level A; resolutions like 1080p60 might have issues.\n");
101         }
102
103         BMDDisplayModeSupport support;
104         IDeckLinkDisplayMode *display_mode;
105         BMDPixelFormat pixel_format = global_flags.ten_bit_output ? bmdFormat10BitYUV : bmdFormat8BitYUV;
106         if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault,
107                                          &support, &display_mode) != S_OK) {
108                 fprintf(stderr, "Couldn't ask for format support\n");
109                 exit(1);
110         }
111
112         if (support == bmdDisplayModeNotSupported) {
113                 fprintf(stderr, "Requested display mode not supported\n");
114                 exit(1);
115         }
116
117         current_mode_flags = display_mode->GetFlags();
118
119         BMDTimeValue time_value;
120         BMDTimeScale time_scale;
121         if (display_mode->GetFrameRate(&time_value, &time_scale) != S_OK) {
122                 fprintf(stderr, "Couldn't get frame rate\n");
123                 exit(1);
124         }
125
126         frame_duration = time_value * TIMEBASE / time_scale;
127
128         display_mode->Release();
129
130         HRESULT result = output->EnableVideoOutput(mode, bmdVideoOutputFlagDefault);
131         if (result != S_OK) {
132                 fprintf(stderr, "Couldn't enable output with error 0x%x\n", result);
133                 exit(1);
134         }
135         if (output->SetScheduledFrameCompletionCallback(this) != S_OK) {
136                 fprintf(stderr, "Couldn't set callback\n");
137                 exit(1);
138         }
139         assert(OUTPUT_FREQUENCY == 48000);
140         if (output->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, 2, bmdAudioOutputStreamTimestamped) != S_OK) {
141                 fprintf(stderr, "Couldn't enable audio output\n");
142                 exit(1);
143         }
144         if (output->BeginAudioPreroll() != S_OK) {
145                 fprintf(stderr, "Couldn't begin audio preroll\n");
146                 exit(1);
147         }
148
149         present_thread = thread([this]{
150                 QOpenGLContext *context = create_context(this->surface);
151                 eglBindAPI(EGL_OPENGL_API);
152                 if (!make_current(context, this->surface)) {
153                         printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(),
154                                 eglGetError());
155                         exit(1);
156                 }
157                 present_thread_func();
158                 delete_context(context);
159         });
160 }
161
162 void DeckLinkOutput::end_output()
163 {
164         if (!playback_initiated) {
165                 return;
166         }
167
168         should_quit.quit();
169         frame_queues_changed.notify_all();
170         present_thread.join();
171         playback_initiated = false;
172
173         output->StopScheduledPlayback(0, nullptr, 0);
174         output->DisableVideoOutput();
175         output->DisableAudioOutput();
176
177         // Wait until all frames are accounted for, and free them.
178         {
179                 unique_lock<mutex> lock(frame_queue_mutex);
180                 while (!(frame_freelist.empty() && num_frames_in_flight == 0)) {
181                         frame_queues_changed.wait(lock, [this]{ return !frame_freelist.empty(); });
182                         frame_freelist.pop();
183                 }
184         }
185 }
186
187 void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, int64_t pts, int64_t duration)
188 {
189         assert(!should_quit.should_quit());
190
191         if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) {
192                 if (!last_frame_had_mode_mismatch) {
193                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n");
194                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec601 (or =auto).\n");
195                 }
196                 last_frame_had_mode_mismatch = true;
197         } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) {
198                 if (!last_frame_had_mode_mismatch) {
199                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n");
200                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec709 (or =auto).\n");
201                 }
202                 last_frame_had_mode_mismatch = true;
203         } else {
204                 last_frame_had_mode_mismatch = false;
205         }
206
207         unique_ptr<Frame> frame = move(get_frame());
208         if (global_flags.ten_bit_output) {
209                 chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
210         } else {
211                 chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
212         }
213
214         // Download the UYVY texture to the PBO.
215         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
216         check_error();
217
218         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
219         check_error();
220
221         if (global_flags.ten_bit_output) {
222                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
223                 check_error();
224                 glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0));
225                 check_error();
226         } else {
227                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
228                 check_error();
229                 glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
230                 check_error();
231         }
232
233         glBindTexture(GL_TEXTURE_2D, 0);
234         check_error();
235         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
236         check_error();
237
238         glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
239         check_error();
240
241         frame->fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
242         check_error();
243         glFlush();  // Make the DeckLink thread see the fence as soon as possible.
244         check_error();
245
246         frame->input_frames = input_frames;
247         frame->received_ts = find_received_timestamp(input_frames);
248         frame->pts = pts;
249         frame->duration = duration;
250
251         {
252                 unique_lock<mutex> lock(frame_queue_mutex);
253                 pending_video_frames.push(move(frame));
254         }
255         frame_queues_changed.notify_all();
256 }
257
258 void DeckLinkOutput::send_audio(int64_t pts, const std::vector<float> &samples)
259 {
260         unique_ptr<int32_t[]> int_samples(new int32_t[samples.size()]);
261         for (size_t i = 0; i < samples.size(); ++i) {
262                 int_samples[i] = lrintf(samples[i] * 2147483648.0f);
263         }
264
265         uint32_t frames_written;
266         HRESULT result = output->ScheduleAudioSamples(int_samples.get(), samples.size() / 2,
267                 pts, TIMEBASE, &frames_written);
268         if (result != S_OK) {
269                 fprintf(stderr, "ScheduleAudioSamples(pts=%ld) failed (result=0x%08x)\n", pts, result);
270         } else {
271                 if (frames_written != samples.size() / 2) {
272                         fprintf(stderr, "ScheduleAudioSamples() returned short write (%u/%ld)\n", frames_written, samples.size() / 2);
273                 }
274         }
275 }
276
277 void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *frame_duration, bool *is_preroll, steady_clock::time_point *frame_timestamp)
278 {
279         assert(!should_quit.should_quit());
280
281         *dropped_frames = 0;
282         *frame_duration = this->frame_duration;
283
284         const BMDTimeValue buffer = lrint(*frame_duration * global_flags.output_buffer_frames);
285         const BMDTimeValue max_overshoot = lrint(*frame_duration * global_flags.output_slop_frames);
286         BMDTimeValue target_time = pts - buffer;
287
288         // While prerolling, we send out frames as quickly as we can.
289         if (target_time < base_pts) {
290                 *is_preroll = true;
291                 return;
292         }
293
294         *is_preroll = !playback_started;
295
296         if (!playback_started) {
297                 if (output->EndAudioPreroll() != S_OK) {
298                         fprintf(stderr, "Could not end audio preroll\n");
299                         exit(1);  // TODO
300                 }
301                 if (output->StartScheduledPlayback(base_pts, TIMEBASE, 1.0) != S_OK) {
302                         fprintf(stderr, "Could not start playback\n");
303                         exit(1);  // TODO
304                 }
305                 playback_started = true;
306         }
307
308         BMDTimeValue stream_frame_time;
309         double playback_speed;
310         output->GetScheduledStreamTime(TIMEBASE, &stream_frame_time, &playback_speed);
311
312         *frame_timestamp = steady_clock::now() +
313                 nanoseconds((target_time - stream_frame_time) * 1000000000 / TIMEBASE);
314
315         // If we're ahead of time, wait for the frame to (approximately) start.
316         if (stream_frame_time < target_time) {
317                 should_quit.sleep_until(*frame_timestamp);
318                 return;
319         }
320
321         // If we overshot the previous frame by just a little,
322         // fire off one immediately.
323         if (stream_frame_time < target_time + max_overshoot) {
324                 fprintf(stderr, "Warning: Frame was %ld ms late (but not skipping it due to --output-slop-frames).\n",
325                         lrint((stream_frame_time - target_time) * 1000.0 / TIMEBASE));
326                 return;
327         }
328
329         // Oops, we missed by more than one frame. Return immediately,
330         // but drop so that we catch up.
331         *dropped_frames = (stream_frame_time - target_time + *frame_duration - 1) / *frame_duration;
332         const int64_t ns_per_frame = this->frame_duration * 1000000000 / TIMEBASE;
333         *frame_timestamp += nanoseconds(*dropped_frames * ns_per_frame);
334         fprintf(stderr, "Dropped %d output frames; skipping.\n", *dropped_frames);
335 }
336
337 uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const
338 {
339         if (video_modes.count(mode)) {
340                 return mode;
341         }
342
343         // Prioritize 59.94 > 60 > 29.97. If none of those are found, then pick the highest one.
344         for (const pair<int, int> &desired : vector<pair<int, int>>{ { 60000, 1001 }, { 60, 0 }, { 30000, 1001 } }) {
345                 for (const auto &it : video_modes) {
346                         if (it.second.frame_rate_num * desired.second == desired.first * it.second.frame_rate_den) {
347                                 return it.first;
348                         }
349                 }
350         }
351
352         uint32_t best_mode = 0;
353         double best_fps = 0.0;
354         for (const auto &it : video_modes) {
355                 double fps = double(it.second.frame_rate_num) / it.second.frame_rate_den;
356                 if (fps > best_fps) {
357                         best_mode = it.first;
358                         best_fps = fps;
359                 }
360         }
361         return best_mode;
362 }
363
364 YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const
365 {
366         if (current_mode_flags & bmdDisplayModeColorspaceRec601) {
367                 return YCBCR_REC_601;
368         } else {
369                 // Don't bother checking bmdDisplayModeColorspaceRec709;
370                 // if none is set, 709 is a good default anyway.
371                 return YCBCR_REC_709;
372         }
373 }
374
375 HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result)
376 {
377         Frame *frame = static_cast<Frame *>(completedFrame);
378         switch (result) {
379         case bmdOutputFrameCompleted:
380                 break;
381         case bmdOutputFrameDisplayedLate:
382                 fprintf(stderr, "Output frame displayed late (pts=%ld)\n", frame->pts);
383                 fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
384                 break;
385         case bmdOutputFrameDropped:
386                 fprintf(stderr, "Output frame was dropped (pts=%ld)\n", frame->pts);
387                 fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
388                 break;
389         case bmdOutputFrameFlushed:
390                 fprintf(stderr, "Output frame was flushed (pts=%ld)\n", frame->pts);
391                 break;
392         default:
393                 fprintf(stderr, "Output frame completed with unknown status %d\n", result);
394                 break;
395         }
396
397         static int frameno = 0;
398         print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno);
399
400         {
401                 lock_guard<mutex> lock(frame_queue_mutex);
402                 frame_freelist.push(unique_ptr<Frame>(frame));
403                 --num_frames_in_flight;
404         }
405
406         return S_OK;
407 }
408
409 HRESULT DeckLinkOutput::ScheduledPlaybackHasStopped()
410 {
411         printf("playback stopped!\n");
412         return S_OK;
413 }
414
415 unique_ptr<DeckLinkOutput::Frame> DeckLinkOutput::get_frame()
416 {
417         lock_guard<mutex> lock(frame_queue_mutex);
418
419         if (!frame_freelist.empty()) {
420                 unique_ptr<Frame> frame = move(frame_freelist.front());
421                 frame_freelist.pop();
422                 return frame;
423         }
424
425         unique_ptr<Frame> frame(new Frame);
426
427         size_t stride;
428         if (global_flags.ten_bit_output) {
429                 stride = v210Converter::get_v210_stride(width);
430                 GLint v210_width = stride / sizeof(uint32_t);
431                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height);
432
433                 // We need valid texture state, or NVIDIA won't allow us to write to the texture.
434                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
435                 check_error();
436                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
437                 check_error();
438         } else {
439                 stride = width * 2;
440                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
441         }
442
443         glGenBuffers(1, &frame->pbo);
444         check_error();
445         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
446         check_error();
447         glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, NULL, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
448         check_error();
449         frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
450         check_error();
451         frame->uyvy_ptr_local.reset(new uint8_t[stride * height]);
452         frame->resource_pool = resource_pool;
453
454         return frame;
455 }
456
457 void DeckLinkOutput::present_thread_func()
458 {
459         pthread_setname_np(pthread_self(), "DeckLinkOutput");
460         for ( ;; ) {
461                 unique_ptr<Frame> frame;
462                 {
463                         unique_lock<mutex> lock(frame_queue_mutex);
464                         frame_queues_changed.wait(lock, [this]{
465                                 return should_quit.should_quit() || !pending_video_frames.empty();
466                         });
467                         if (should_quit.should_quit()) {
468                                 return;
469                         }
470                         frame = move(pending_video_frames.front());
471                         pending_video_frames.pop();
472                         ++num_frames_in_flight;
473                 }
474
475                 glClientWaitSync(frame->fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
476                 check_error();
477                 frame->fence.reset();
478
479                 if (global_flags.ten_bit_output) {
480                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height);
481                 } else {
482                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
483                 }
484
485                 // Release any input frames we needed to render this frame.
486                 frame->input_frames.clear();
487
488                 BMDTimeValue pts = frame->pts;
489                 BMDTimeValue duration = frame->duration;
490                 HRESULT res = output->ScheduleVideoFrame(frame.get(), pts, duration, TIMEBASE);
491                 if (res == S_OK) {
492                         frame.release();  // Owned by the driver now.
493                 } else {
494                         fprintf(stderr, "Could not schedule video frame! (error=0x%08x)\n", res);
495
496                         lock_guard<mutex> lock(frame_queue_mutex);
497                         frame_freelist.push(move(frame));
498                         --num_frames_in_flight;
499                 }
500         }
501 }
502
503 HRESULT STDMETHODCALLTYPE DeckLinkOutput::QueryInterface(REFIID, LPVOID *)
504 {
505         return E_NOINTERFACE;
506 }
507
508 ULONG STDMETHODCALLTYPE DeckLinkOutput::AddRef()
509 {
510         return refcount.fetch_add(1) + 1;
511 }
512
513 ULONG STDMETHODCALLTYPE DeckLinkOutput::Release()
514 {
515         int new_ref = refcount.fetch_sub(1) - 1;
516         if (new_ref == 0)
517                 delete this;
518         return new_ref;
519 }
520
521 DeckLinkOutput::Frame::~Frame()
522 {
523         glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
524         check_error();
525         glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
526         check_error();
527         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
528         check_error();
529         glDeleteBuffers(1, &pbo);
530         check_error();
531         resource_pool->release_2d_texture(uyvy_tex);
532         check_error();
533 }
534
535 HRESULT STDMETHODCALLTYPE DeckLinkOutput::Frame::QueryInterface(REFIID, LPVOID *)
536 {
537         return E_NOINTERFACE;
538 }
539
540 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::AddRef()
541 {
542         return refcount.fetch_add(1) + 1;
543 }
544
545 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::Release()
546 {
547         int new_ref = refcount.fetch_sub(1) - 1;
548         if (new_ref == 0)
549                 delete this;
550         return new_ref;
551 }
552
553 long DeckLinkOutput::Frame::GetWidth()
554 {
555         return global_flags.width;
556 }
557
558 long DeckLinkOutput::Frame::GetHeight()
559 {
560         return global_flags.height;
561 }
562
563 long DeckLinkOutput::Frame::GetRowBytes()
564 {
565         if (global_flags.ten_bit_output) {
566                 return v210Converter::get_v210_stride(global_flags.width);
567         } else {
568                 return global_flags.width * 2;
569         }
570 }
571
572 BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat()
573 {
574         if (global_flags.ten_bit_output) {
575                 return bmdFormat10BitYUV;
576         } else {
577                 return bmdFormat8BitYUV;
578         }
579 }
580
581 BMDFrameFlags DeckLinkOutput::Frame::GetFlags()
582 {
583         return bmdFrameFlagDefault;
584 }
585
586 HRESULT DeckLinkOutput::Frame::GetBytes(/* out */ void **buffer)
587 {
588         *buffer = uyvy_ptr_local.get();
589         return S_OK;
590 }
591
592 HRESULT DeckLinkOutput::Frame::GetTimecode(/* in */ BMDTimecodeFormat format, /* out */ IDeckLinkTimecode **timecode)
593 {
594         fprintf(stderr, "STUB: GetTimecode()\n");
595         return E_NOTIMPL;
596 }
597
598 HRESULT DeckLinkOutput::Frame::GetAncillaryData(/* out */ IDeckLinkVideoFrameAncillary **ancillary)
599 {
600         fprintf(stderr, "STUB: GetAncillaryData()\n");
601         return E_NOTIMPL;
602 }