]> git.sesse.net Git - nageru/blob - decklink_output.cpp
Add exported metrics for all the latency measurements.
[nageru] / decklink_output.cpp
1 #include <movit/effect_util.h>
2 #include <movit/util.h>
3 #include <movit/resource_pool.h>  // Must be above the Xlib includes.
4 #include <pthread.h>
5
6 #include <epoxy/egl.h>
7
8 #include "chroma_subsampler.h"
9 #include "decklink_output.h"
10 #include "decklink_util.h"
11 #include "flags.h"
12 #include "print_latency.h"
13 #include "resource_pool.h"
14 #include "timebase.h"
15 #include "v210_converter.h"
16
17 using namespace movit;
18 using namespace std;
19 using namespace std::chrono;
20
21 DeckLinkOutput::DeckLinkOutput(ResourcePool *resource_pool, QSurface *surface, unsigned width, unsigned height, unsigned card_index)
22         : resource_pool(resource_pool), surface(surface), width(width), height(height), card_index(card_index)
23 {
24         chroma_subsampler.reset(new ChromaSubsampler(resource_pool));
25         latency_histogram.init("decklink_output");
26 }
27
28 void DeckLinkOutput::set_device(IDeckLink *decklink)
29 {
30         if (decklink->QueryInterface(IID_IDeckLinkOutput, (void**)&output) != S_OK) {
31                 fprintf(stderr, "Card %u has no outputs\n", card_index);
32                 exit(1);
33         }
34
35         IDeckLinkDisplayModeIterator *mode_it;
36         if (output->GetDisplayModeIterator(&mode_it) != S_OK) {
37                 fprintf(stderr, "Failed to enumerate output display modes for card %u\n", card_index);
38                 exit(1);
39         }
40
41         video_modes.clear();
42
43         for (const auto &it : summarize_video_modes(mode_it, card_index)) {
44                 if (it.second.width != width || it.second.height != height) {
45                         continue;
46                 }
47
48                 // We could support interlaced modes, but let's stay out of it for now,
49                 // since we don't have interlaced stream output.
50                 if (it.second.interlaced) {
51                         continue;
52                 }
53
54                 video_modes.insert(it);
55         }
56
57         mode_it->Release();
58
59         // HDMI or SDI generally mean “both HDMI and SDI at the same time” on DeckLink cards
60         // that support both; pick_default_video_connection() will generally pick one of those
61         // if they exist. We're not very likely to need analog outputs, so we don't need a way
62         // to change beyond that.
63         video_connection = pick_default_video_connection(decklink, BMDDeckLinkVideoOutputConnections, card_index);
64 }
65
66 void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts)
67 {
68         assert(output);
69         assert(!playback_initiated);
70
71         if (video_modes.empty()) {
72                 fprintf(stderr, "ERROR: No matching output modes for %dx%d found\n", width, height);
73                 exit(1);
74         }
75
76         should_quit.unquit();
77         playback_initiated = true;
78         playback_started = false;
79         this->base_pts = base_pts;
80
81         IDeckLinkConfiguration *config = nullptr;
82         if (output->QueryInterface(IID_IDeckLinkConfiguration, (void**)&config) != S_OK) {
83                 fprintf(stderr, "Failed to get configuration interface for output card\n");
84                 exit(1);
85         }
86         if (config->SetFlag(bmdDeckLinkConfigLowLatencyVideoOutput, true) != S_OK) {
87                 fprintf(stderr, "Failed to set low latency output\n");
88                 exit(1);
89         }
90         if (config->SetInt(bmdDeckLinkConfigVideoOutputConnection, video_connection) != S_OK) {
91                 fprintf(stderr, "Failed to set video output connection for card %u\n", card_index);
92                 exit(1);
93         }
94         if (config->SetFlag(bmdDeckLinkConfigUse1080pNotPsF, true) != S_OK) {
95                 fprintf(stderr, "Failed to set PsF flag for card\n");
96                 exit(1);
97         }
98         if (config->SetFlag(bmdDeckLinkConfigSMPTELevelAOutput, true) != S_OK) {
99                 // This affects at least some no-name SDI->HDMI converters.
100                 // Warn, but don't die.
101                 fprintf(stderr, "WARNING: Failed to enable SMTPE Level A; resolutions like 1080p60 might have issues.\n");
102         }
103
104         BMDDisplayModeSupport support;
105         IDeckLinkDisplayMode *display_mode;
106         BMDPixelFormat pixel_format = global_flags.ten_bit_output ? bmdFormat10BitYUV : bmdFormat8BitYUV;
107         if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault,
108                                          &support, &display_mode) != S_OK) {
109                 fprintf(stderr, "Couldn't ask for format support\n");
110                 exit(1);
111         }
112
113         if (support == bmdDisplayModeNotSupported) {
114                 fprintf(stderr, "Requested display mode not supported\n");
115                 exit(1);
116         }
117
118         current_mode_flags = display_mode->GetFlags();
119
120         BMDTimeValue time_value;
121         BMDTimeScale time_scale;
122         if (display_mode->GetFrameRate(&time_value, &time_scale) != S_OK) {
123                 fprintf(stderr, "Couldn't get frame rate\n");
124                 exit(1);
125         }
126
127         frame_duration = time_value * TIMEBASE / time_scale;
128
129         display_mode->Release();
130
131         HRESULT result = output->EnableVideoOutput(mode, bmdVideoOutputFlagDefault);
132         if (result != S_OK) {
133                 fprintf(stderr, "Couldn't enable output with error 0x%x\n", result);
134                 exit(1);
135         }
136         if (output->SetScheduledFrameCompletionCallback(this) != S_OK) {
137                 fprintf(stderr, "Couldn't set callback\n");
138                 exit(1);
139         }
140         assert(OUTPUT_FREQUENCY == 48000);
141         if (output->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, 2, bmdAudioOutputStreamTimestamped) != S_OK) {
142                 fprintf(stderr, "Couldn't enable audio output\n");
143                 exit(1);
144         }
145         if (output->BeginAudioPreroll() != S_OK) {
146                 fprintf(stderr, "Couldn't begin audio preroll\n");
147                 exit(1);
148         }
149
150         present_thread = thread([this]{
151                 QOpenGLContext *context = create_context(this->surface);
152                 eglBindAPI(EGL_OPENGL_API);
153                 if (!make_current(context, this->surface)) {
154                         printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(),
155                                 eglGetError());
156                         exit(1);
157                 }
158                 present_thread_func();
159                 delete_context(context);
160         });
161 }
162
163 void DeckLinkOutput::end_output()
164 {
165         if (!playback_initiated) {
166                 return;
167         }
168
169         should_quit.quit();
170         frame_queues_changed.notify_all();
171         present_thread.join();
172         playback_initiated = false;
173
174         output->StopScheduledPlayback(0, nullptr, 0);
175         output->DisableVideoOutput();
176         output->DisableAudioOutput();
177
178         // Wait until all frames are accounted for, and free them.
179         {
180                 unique_lock<mutex> lock(frame_queue_mutex);
181                 while (!(frame_freelist.empty() && num_frames_in_flight == 0)) {
182                         frame_queues_changed.wait(lock, [this]{ return !frame_freelist.empty(); });
183                         frame_freelist.pop();
184                 }
185         }
186 }
187
188 void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, int64_t pts, int64_t duration)
189 {
190         assert(!should_quit.should_quit());
191
192         if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) {
193                 if (!last_frame_had_mode_mismatch) {
194                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n");
195                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec601 (or =auto).\n");
196                 }
197                 last_frame_had_mode_mismatch = true;
198         } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) {
199                 if (!last_frame_had_mode_mismatch) {
200                         fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n");
201                         fprintf(stderr, "         Consider --output-ycbcr-coefficients=rec709 (or =auto).\n");
202                 }
203                 last_frame_had_mode_mismatch = true;
204         } else {
205                 last_frame_had_mode_mismatch = false;
206         }
207
208         unique_ptr<Frame> frame = move(get_frame());
209         if (global_flags.ten_bit_output) {
210                 chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
211         } else {
212                 chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
213         }
214
215         // Download the UYVY texture to the PBO.
216         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
217         check_error();
218
219         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
220         check_error();
221
222         if (global_flags.ten_bit_output) {
223                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
224                 check_error();
225                 glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0));
226                 check_error();
227         } else {
228                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
229                 check_error();
230                 glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
231                 check_error();
232         }
233
234         glBindTexture(GL_TEXTURE_2D, 0);
235         check_error();
236         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
237         check_error();
238
239         glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
240         check_error();
241
242         frame->fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
243         check_error();
244         glFlush();  // Make the DeckLink thread see the fence as soon as possible.
245         check_error();
246
247         frame->input_frames = input_frames;
248         frame->received_ts = find_received_timestamp(input_frames);
249         frame->pts = pts;
250         frame->duration = duration;
251
252         {
253                 unique_lock<mutex> lock(frame_queue_mutex);
254                 pending_video_frames.push(move(frame));
255         }
256         frame_queues_changed.notify_all();
257 }
258
259 void DeckLinkOutput::send_audio(int64_t pts, const std::vector<float> &samples)
260 {
261         unique_ptr<int32_t[]> int_samples(new int32_t[samples.size()]);
262         for (size_t i = 0; i < samples.size(); ++i) {
263                 int_samples[i] = lrintf(samples[i] * 2147483648.0f);
264         }
265
266         uint32_t frames_written;
267         HRESULT result = output->ScheduleAudioSamples(int_samples.get(), samples.size() / 2,
268                 pts, TIMEBASE, &frames_written);
269         if (result != S_OK) {
270                 fprintf(stderr, "ScheduleAudioSamples(pts=%ld) failed (result=0x%08x)\n", pts, result);
271         } else {
272                 if (frames_written != samples.size() / 2) {
273                         fprintf(stderr, "ScheduleAudioSamples() returned short write (%u/%ld)\n", frames_written, samples.size() / 2);
274                 }
275         }
276 }
277
278 void DeckLinkOutput::wait_for_frame(int64_t pts, int *dropped_frames, int64_t *frame_duration, bool *is_preroll, steady_clock::time_point *frame_timestamp)
279 {
280         assert(!should_quit.should_quit());
281
282         *dropped_frames = 0;
283         *frame_duration = this->frame_duration;
284
285         const BMDTimeValue buffer = lrint(*frame_duration * global_flags.output_buffer_frames);
286         const BMDTimeValue max_overshoot = lrint(*frame_duration * global_flags.output_slop_frames);
287         BMDTimeValue target_time = pts - buffer;
288
289         // While prerolling, we send out frames as quickly as we can.
290         if (target_time < base_pts) {
291                 *is_preroll = true;
292                 return;
293         }
294
295         *is_preroll = !playback_started;
296
297         if (!playback_started) {
298                 if (output->EndAudioPreroll() != S_OK) {
299                         fprintf(stderr, "Could not end audio preroll\n");
300                         exit(1);  // TODO
301                 }
302                 if (output->StartScheduledPlayback(base_pts, TIMEBASE, 1.0) != S_OK) {
303                         fprintf(stderr, "Could not start playback\n");
304                         exit(1);  // TODO
305                 }
306                 playback_started = true;
307         }
308
309         BMDTimeValue stream_frame_time;
310         double playback_speed;
311         output->GetScheduledStreamTime(TIMEBASE, &stream_frame_time, &playback_speed);
312
313         *frame_timestamp = steady_clock::now() +
314                 nanoseconds((target_time - stream_frame_time) * 1000000000 / TIMEBASE);
315
316         // If we're ahead of time, wait for the frame to (approximately) start.
317         if (stream_frame_time < target_time) {
318                 should_quit.sleep_until(*frame_timestamp);
319                 return;
320         }
321
322         // If we overshot the previous frame by just a little,
323         // fire off one immediately.
324         if (stream_frame_time < target_time + max_overshoot) {
325                 fprintf(stderr, "Warning: Frame was %ld ms late (but not skipping it due to --output-slop-frames).\n",
326                         lrint((stream_frame_time - target_time) * 1000.0 / TIMEBASE));
327                 return;
328         }
329
330         // Oops, we missed by more than one frame. Return immediately,
331         // but drop so that we catch up.
332         *dropped_frames = (stream_frame_time - target_time + *frame_duration - 1) / *frame_duration;
333         const int64_t ns_per_frame = this->frame_duration * 1000000000 / TIMEBASE;
334         *frame_timestamp += nanoseconds(*dropped_frames * ns_per_frame);
335         fprintf(stderr, "Dropped %d output frames; skipping.\n", *dropped_frames);
336 }
337
338 uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const
339 {
340         if (video_modes.count(mode)) {
341                 return mode;
342         }
343
344         // Prioritize 59.94 > 60 > 29.97. If none of those are found, then pick the highest one.
345         for (const pair<int, int> &desired : vector<pair<int, int>>{ { 60000, 1001 }, { 60, 0 }, { 30000, 1001 } }) {
346                 for (const auto &it : video_modes) {
347                         if (it.second.frame_rate_num * desired.second == desired.first * it.second.frame_rate_den) {
348                                 return it.first;
349                         }
350                 }
351         }
352
353         uint32_t best_mode = 0;
354         double best_fps = 0.0;
355         for (const auto &it : video_modes) {
356                 double fps = double(it.second.frame_rate_num) / it.second.frame_rate_den;
357                 if (fps > best_fps) {
358                         best_mode = it.first;
359                         best_fps = fps;
360                 }
361         }
362         return best_mode;
363 }
364
365 YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const
366 {
367         if (current_mode_flags & bmdDisplayModeColorspaceRec601) {
368                 return YCBCR_REC_601;
369         } else {
370                 // Don't bother checking bmdDisplayModeColorspaceRec709;
371                 // if none is set, 709 is a good default anyway.
372                 return YCBCR_REC_709;
373         }
374 }
375
376 HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result)
377 {
378         Frame *frame = static_cast<Frame *>(completedFrame);
379         switch (result) {
380         case bmdOutputFrameCompleted:
381                 break;
382         case bmdOutputFrameDisplayedLate:
383                 fprintf(stderr, "Output frame displayed late (pts=%ld)\n", frame->pts);
384                 fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
385                 break;
386         case bmdOutputFrameDropped:
387                 fprintf(stderr, "Output frame was dropped (pts=%ld)\n", frame->pts);
388                 fprintf(stderr, "Consider increasing --output-buffer-frames if this persists.\n");
389                 break;
390         case bmdOutputFrameFlushed:
391                 fprintf(stderr, "Output frame was flushed (pts=%ld)\n", frame->pts);
392                 break;
393         default:
394                 fprintf(stderr, "Output frame completed with unknown status %d\n", result);
395                 break;
396         }
397
398         static int frameno = 0;
399         print_latency("DeckLink output latency (frame received → output on HDMI):", frame->received_ts, false, &frameno, &latency_histogram);
400
401         {
402                 lock_guard<mutex> lock(frame_queue_mutex);
403                 frame_freelist.push(unique_ptr<Frame>(frame));
404                 --num_frames_in_flight;
405         }
406
407         return S_OK;
408 }
409
410 HRESULT DeckLinkOutput::ScheduledPlaybackHasStopped()
411 {
412         printf("playback stopped!\n");
413         return S_OK;
414 }
415
416 unique_ptr<DeckLinkOutput::Frame> DeckLinkOutput::get_frame()
417 {
418         lock_guard<mutex> lock(frame_queue_mutex);
419
420         if (!frame_freelist.empty()) {
421                 unique_ptr<Frame> frame = move(frame_freelist.front());
422                 frame_freelist.pop();
423                 return frame;
424         }
425
426         unique_ptr<Frame> frame(new Frame);
427
428         size_t stride;
429         if (global_flags.ten_bit_output) {
430                 stride = v210Converter::get_v210_stride(width);
431                 GLint v210_width = stride / sizeof(uint32_t);
432                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height);
433
434                 // We need valid texture state, or NVIDIA won't allow us to write to the texture.
435                 glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
436                 check_error();
437                 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
438                 check_error();
439         } else {
440                 stride = width * 2;
441                 frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
442         }
443
444         glGenBuffers(1, &frame->pbo);
445         check_error();
446         glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
447         check_error();
448         glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, NULL, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
449         check_error();
450         frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
451         check_error();
452         frame->uyvy_ptr_local.reset(new uint8_t[stride * height]);
453         frame->resource_pool = resource_pool;
454
455         return frame;
456 }
457
458 void DeckLinkOutput::present_thread_func()
459 {
460         pthread_setname_np(pthread_self(), "DeckLinkOutput");
461         for ( ;; ) {
462                 unique_ptr<Frame> frame;
463                 {
464                         unique_lock<mutex> lock(frame_queue_mutex);
465                         frame_queues_changed.wait(lock, [this]{
466                                 return should_quit.should_quit() || !pending_video_frames.empty();
467                         });
468                         if (should_quit.should_quit()) {
469                                 return;
470                         }
471                         frame = move(pending_video_frames.front());
472                         pending_video_frames.pop();
473                         ++num_frames_in_flight;
474                 }
475
476                 glClientWaitSync(frame->fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
477                 check_error();
478                 frame->fence.reset();
479
480                 if (global_flags.ten_bit_output) {
481                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height);
482                 } else {
483                         memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
484                 }
485
486                 // Release any input frames we needed to render this frame.
487                 frame->input_frames.clear();
488
489                 BMDTimeValue pts = frame->pts;
490                 BMDTimeValue duration = frame->duration;
491                 HRESULT res = output->ScheduleVideoFrame(frame.get(), pts, duration, TIMEBASE);
492                 if (res == S_OK) {
493                         frame.release();  // Owned by the driver now.
494                 } else {
495                         fprintf(stderr, "Could not schedule video frame! (error=0x%08x)\n", res);
496
497                         lock_guard<mutex> lock(frame_queue_mutex);
498                         frame_freelist.push(move(frame));
499                         --num_frames_in_flight;
500                 }
501         }
502 }
503
504 HRESULT STDMETHODCALLTYPE DeckLinkOutput::QueryInterface(REFIID, LPVOID *)
505 {
506         return E_NOINTERFACE;
507 }
508
509 ULONG STDMETHODCALLTYPE DeckLinkOutput::AddRef()
510 {
511         return refcount.fetch_add(1) + 1;
512 }
513
514 ULONG STDMETHODCALLTYPE DeckLinkOutput::Release()
515 {
516         int new_ref = refcount.fetch_sub(1) - 1;
517         if (new_ref == 0)
518                 delete this;
519         return new_ref;
520 }
521
522 DeckLinkOutput::Frame::~Frame()
523 {
524         glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
525         check_error();
526         glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
527         check_error();
528         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
529         check_error();
530         glDeleteBuffers(1, &pbo);
531         check_error();
532         resource_pool->release_2d_texture(uyvy_tex);
533         check_error();
534 }
535
536 HRESULT STDMETHODCALLTYPE DeckLinkOutput::Frame::QueryInterface(REFIID, LPVOID *)
537 {
538         return E_NOINTERFACE;
539 }
540
541 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::AddRef()
542 {
543         return refcount.fetch_add(1) + 1;
544 }
545
546 ULONG STDMETHODCALLTYPE DeckLinkOutput::Frame::Release()
547 {
548         int new_ref = refcount.fetch_sub(1) - 1;
549         if (new_ref == 0)
550                 delete this;
551         return new_ref;
552 }
553
554 long DeckLinkOutput::Frame::GetWidth()
555 {
556         return global_flags.width;
557 }
558
559 long DeckLinkOutput::Frame::GetHeight()
560 {
561         return global_flags.height;
562 }
563
564 long DeckLinkOutput::Frame::GetRowBytes()
565 {
566         if (global_flags.ten_bit_output) {
567                 return v210Converter::get_v210_stride(global_flags.width);
568         } else {
569                 return global_flags.width * 2;
570         }
571 }
572
573 BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat()
574 {
575         if (global_flags.ten_bit_output) {
576                 return bmdFormat10BitYUV;
577         } else {
578                 return bmdFormat8BitYUV;
579         }
580 }
581
582 BMDFrameFlags DeckLinkOutput::Frame::GetFlags()
583 {
584         return bmdFrameFlagDefault;
585 }
586
587 HRESULT DeckLinkOutput::Frame::GetBytes(/* out */ void **buffer)
588 {
589         *buffer = uyvy_ptr_local.get();
590         return S_OK;
591 }
592
593 HRESULT DeckLinkOutput::Frame::GetTimecode(/* in */ BMDTimecodeFormat format, /* out */ IDeckLinkTimecode **timecode)
594 {
595         fprintf(stderr, "STUB: GetTimecode()\n");
596         return E_NOTIMPL;
597 }
598
599 HRESULT DeckLinkOutput::Frame::GetAncillaryData(/* out */ IDeckLinkVideoFrameAncillary **ancillary)
600 {
601         fprintf(stderr, "STUB: GetAncillaryData()\n");
602         return E_NOTIMPL;
603 }