]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Convert decode_video_format to return unsigned quantities.
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <assert.h>
8 #include <errno.h>
9 #include <libusb.h>
10 #include <netinet/in.h>
11 #include <sched.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #ifdef __SSE4_1__
17 #include <immintrin.h>
18 #endif
19 #include "bmusb.h"
20
21 #include <algorithm>
22 #include <atomic>
23 #include <condition_variable>
24 #include <cstddef>
25 #include <cstdint>
26 #include <deque>
27 #include <functional>
28 #include <memory>
29 #include <mutex>
30 #include <stack>
31 #include <thread>
32
33 using namespace std;
34 using namespace std::placeholders;
35
36 #define MIN_WIDTH 640
37 #define HEADER_SIZE 44
38 //#define HEADER_SIZE 0
39 #define AUDIO_HEADER_SIZE 4
40
41 #define FRAME_SIZE (8 << 20)  // 8 MB.
42 #define USB_VIDEO_TRANSFER_SIZE (128 << 10)  // 128 kB.
43
44 namespace {
45
46 FILE *audiofp;
47
48 thread usb_thread;
49 atomic<bool> should_quit;
50
51 int find_xfer_size_for_width(int width)
52 {
53         // Video seems to require isochronous packets scaled with the width;
54         // seemingly six lines is about right, rounded up to the required 1kB
55         // multiple.
56         int size = width * 2 * 6;
57         // Note that for 10-bit input, you'll need to increase size accordingly.
58         //size = size * 4 / 3;
59         if (size % 1024 != 0) {
60                 size &= ~1023;
61                 size += 1024;
62         }
63         return size;
64 }
65
66 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
67 {
68         assert(width >= MIN_WIDTH);
69         size_t size = find_xfer_size_for_width(width);
70         int num_iso_pack = xfr->length / size;
71         if (num_iso_pack != xfr->num_iso_packets ||
72             size != xfr->iso_packet_desc[0].length) {
73                 xfr->num_iso_packets = num_iso_pack;
74                 libusb_set_iso_packet_lengths(xfr, size);
75         }
76 }
77
78 }  // namespace
79
80 FrameAllocator::~FrameAllocator() {}
81
82 // Audio is more important than video, and also much cheaper.
83 // By having many more audio frames available, hopefully if something
84 // starts to drop, we'll have CPU load go down (from not having to
85 // process as much video) before we have to drop audio.
86 #define NUM_QUEUED_VIDEO_FRAMES 16
87 #define NUM_QUEUED_AUDIO_FRAMES 64
88
89 class MallocFrameAllocator : public FrameAllocator {
90 public:
91         MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
92         Frame alloc_frame() override;
93         void release_frame(Frame frame) override;
94
95 private:
96         size_t frame_size;
97
98         mutex freelist_mutex;
99         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
100 };
101
102 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
103         : frame_size(frame_size)
104 {
105         for (size_t i = 0; i < num_queued_frames; ++i) {
106                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
107         }
108 }
109
110 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
111 {
112         Frame vf;
113         vf.owner = this;
114
115         unique_lock<mutex> lock(freelist_mutex);  // Meh.
116         if (freelist.empty()) {
117                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
118                         frame_size);
119         } else {
120                 vf.data = freelist.top().release();
121                 vf.size = frame_size;
122                 freelist.pop();  // Meh.
123         }
124         return vf;
125 }
126
127 void MallocFrameAllocator::release_frame(Frame frame)
128 {
129         if (frame.overflow > 0) {
130                 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
131         }
132         unique_lock<mutex> lock(freelist_mutex);
133         freelist.push(unique_ptr<uint8_t[]>(frame.data));
134 }
135
136 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
137 {
138         if (a == b) {
139                 return false;
140         } else if (a < b) {
141                 return (b - a < 0x8000);
142         } else {
143                 int wrap_b = 0x10000 + int(b);
144                 return (wrap_b - a < 0x8000);
145         }
146 }
147
148 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
149 {
150         unique_lock<mutex> lock(queue_lock);
151         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
152                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
153                         q->back().timecode, timecode);
154                 frame.owner->release_frame(frame);
155                 return;
156         }
157
158         QueuedFrame qf;
159         qf.format = format;
160         qf.timecode = timecode;
161         qf.frame = frame;
162         q->push_back(move(qf));
163         queues_not_empty.notify_one();  // might be spurious
164 }
165
166 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
167 {
168         FILE *fp = fopen(filename, "wb");
169         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
170                 printf("short write!\n");
171         }
172         fclose(fp);
173 }
174
175 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
176 {
177         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
178 }
179
180 void BMUSBCapture::dequeue_thread_func()
181 {
182         if (has_dequeue_callbacks) {
183                 dequeue_init_callback();
184         }
185         while (!dequeue_thread_should_quit) {
186                 unique_lock<mutex> lock(queue_lock);
187                 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
188
189                 if (dequeue_thread_should_quit) break;
190
191                 uint16_t video_timecode = pending_video_frames.front().timecode;
192                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
193                 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
194                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
195                                 video_timecode);
196                         QueuedFrame video_frame = pending_video_frames.front();
197                         pending_video_frames.pop_front();
198                         lock.unlock();
199                         video_frame_allocator->release_frame(video_frame.frame);
200                 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
201                         printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
202                                 audio_timecode);
203                         QueuedFrame audio_frame = pending_audio_frames.front();
204                         pending_audio_frames.pop_front();
205                         lock.unlock();
206                         frame_callback(audio_timecode,
207                                        FrameAllocator::Frame(), 0, 0x0000,
208                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
209                 } else {
210                         QueuedFrame video_frame = pending_video_frames.front();
211                         QueuedFrame audio_frame = pending_audio_frames.front();
212                         pending_audio_frames.pop_front();
213                         pending_video_frames.pop_front();
214                         lock.unlock();
215
216 #if 0
217                         char filename[255];
218                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
219                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
220                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
221 #endif
222
223                         frame_callback(video_timecode,
224                                        video_frame.frame, HEADER_SIZE, video_frame.format,
225                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
226                 }
227         }
228         if (has_dequeue_callbacks) {
229                 dequeue_cleanup_callback();
230         }
231 }
232
233 void BMUSBCapture::start_new_frame(const uint8_t *start)
234 {
235         uint16_t format = (start[3] << 8) | start[2];
236         uint16_t timecode = (start[1] << 8) | start[0];
237
238         if (current_video_frame.len > 0) {
239                 // If format is 0x0800 (no signal), add a fake (empty) audio
240                 // frame to get it out of the queue.
241                 // TODO: Figure out if there are other formats that come with
242                 // no audio, and treat them the same.
243                 if (format == 0x0800) {
244                         FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
245                         if (fake_audio_frame.data == nullptr) {
246                                 // Oh well, it's just a no-signal frame anyway.
247                                 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
248                                 current_video_frame.owner->release_frame(current_video_frame);
249                                 current_video_frame = video_frame_allocator->alloc_frame();
250                                 return;
251                         }
252                         queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
253                 }
254                 //dump_frame();
255                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
256
257                 // Update the assumed frame width. We might be one frame too late on format changes,
258                 // but it's much better than asking the user to choose manually.
259                 unsigned width, height, extra_lines_top, extra_lines_bottom, frame_rate_nom, frame_rate_den;
260                 bool interlaced;
261                 if (decode_video_format(format, &width, &height, &extra_lines_top, &extra_lines_bottom,
262                                         &frame_rate_nom, &frame_rate_den, &interlaced)) {
263                         assumed_frame_width = width;
264                 }
265         }
266         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
267         //      format, timecode,
268         //      //start[7], start[6], start[5], start[4],
269         //      read_current_frame, FRAME_SIZE);
270
271         current_video_frame = video_frame_allocator->alloc_frame();
272         //if (current_video_frame.data == nullptr) {
273         //      read_current_frame = -1;
274         //} else {
275         //      read_current_frame = 0;
276         //}
277 }
278
279 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
280 {
281         uint16_t format = (start[3] << 8) | start[2];
282         uint16_t timecode = (start[1] << 8) | start[0];
283         if (current_audio_frame.len > 0) {
284                 //dump_audio_block();
285                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
286         }
287         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
288         //      format, timecode, read_current_audio_block);
289         current_audio_frame = audio_frame_allocator->alloc_frame();
290 }
291
292 #if 0
293 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
294 {
295         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
296         for (unsigned j = 0; j < pack->actual_length; j++) {
297         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
298                 printf("%02x", xfr->buffer[j + offset]);
299                 if ((j % 16) == 15)
300                         printf("\n");
301                 else if ((j % 8) == 7)
302                         printf("  ");
303                 else
304                         printf(" ");
305         }
306 }
307 #endif
308
309 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
310 {
311         assert(n % 2 == 0);
312         uint8_t *dptr1 = dest1;
313         uint8_t *dptr2 = dest2;
314
315         for (size_t i = 0; i < n; i += 2) {
316                 *dptr1++ = *src++;
317                 *dptr2++ = *src++;
318         }
319 }
320
321 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
322 {
323         if (current_frame->data == nullptr ||
324             current_frame->len > current_frame->size ||
325             start == end) {
326                 return;
327         }
328
329         int bytes = end - start;
330         if (current_frame->len + bytes > current_frame->size) {
331                 current_frame->overflow = current_frame->len + bytes - current_frame->size;
332                 current_frame->len = current_frame->size;
333                 if (current_frame->overflow > 1048576) {
334                         printf("%d bytes overflow after last %s frame\n",
335                                 int(current_frame->overflow), frame_type_name);
336                         current_frame->overflow = 0;
337                 }
338                 //dump_frame();
339         } else {
340                 if (current_frame->interleaved) {
341                         uint8_t *data = current_frame->data + current_frame->len / 2;
342                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
343                         if (current_frame->len % 2 == 1) {
344                                 ++data;
345                                 swap(data, data2);
346                         }
347                         if (bytes % 2 == 1) {
348                                 *data++ = *start++;
349                                 swap(data, data2);
350                                 ++current_frame->len;
351                                 --bytes;
352                         }
353                         memcpy_interleaved(data, data2, start, bytes);
354                         current_frame->len += bytes;
355                 } else {
356                         memcpy(current_frame->data + current_frame->len, start, bytes);
357                         current_frame->len += bytes;
358                 }
359         }
360 }
361
362 #ifdef __SSE4_1__
363
364 #if 0
365 void avx2_dump(const char *name, __m256i n)
366 {
367         printf("%-10s:", name);
368         printf(" %02x", _mm256_extract_epi8(n, 0));
369         printf(" %02x", _mm256_extract_epi8(n, 1));
370         printf(" %02x", _mm256_extract_epi8(n, 2));
371         printf(" %02x", _mm256_extract_epi8(n, 3));
372         printf(" %02x", _mm256_extract_epi8(n, 4));
373         printf(" %02x", _mm256_extract_epi8(n, 5));
374         printf(" %02x", _mm256_extract_epi8(n, 6));
375         printf(" %02x", _mm256_extract_epi8(n, 7));
376         printf(" ");
377         printf(" %02x", _mm256_extract_epi8(n, 8));
378         printf(" %02x", _mm256_extract_epi8(n, 9));
379         printf(" %02x", _mm256_extract_epi8(n, 10));
380         printf(" %02x", _mm256_extract_epi8(n, 11));
381         printf(" %02x", _mm256_extract_epi8(n, 12));
382         printf(" %02x", _mm256_extract_epi8(n, 13));
383         printf(" %02x", _mm256_extract_epi8(n, 14));
384         printf(" %02x", _mm256_extract_epi8(n, 15));
385         printf(" ");
386         printf(" %02x", _mm256_extract_epi8(n, 16));
387         printf(" %02x", _mm256_extract_epi8(n, 17));
388         printf(" %02x", _mm256_extract_epi8(n, 18));
389         printf(" %02x", _mm256_extract_epi8(n, 19));
390         printf(" %02x", _mm256_extract_epi8(n, 20));
391         printf(" %02x", _mm256_extract_epi8(n, 21));
392         printf(" %02x", _mm256_extract_epi8(n, 22));
393         printf(" %02x", _mm256_extract_epi8(n, 23));
394         printf(" ");
395         printf(" %02x", _mm256_extract_epi8(n, 24));
396         printf(" %02x", _mm256_extract_epi8(n, 25));
397         printf(" %02x", _mm256_extract_epi8(n, 26));
398         printf(" %02x", _mm256_extract_epi8(n, 27));
399         printf(" %02x", _mm256_extract_epi8(n, 28));
400         printf(" %02x", _mm256_extract_epi8(n, 29));
401         printf(" %02x", _mm256_extract_epi8(n, 30));
402         printf(" %02x", _mm256_extract_epi8(n, 31));
403         printf("\n");
404 }
405 #endif
406
407 // Does a memcpy and memchr in one to reduce processing time.
408 // Note that the benefit is somewhat limited if your L3 cache is small,
409 // as you'll (unfortunately) spend most of the time loading the data
410 // from main memory.
411 //
412 // Complicated cases are left to the slow path; it basically stops copying
413 // up until the first instance of "sync_char" (usually a bit before, actually).
414 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
415 // data, and what we really need this for is the 00 00 ff ff marker in video data.
416 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
417 {
418         if (current_frame->data == nullptr ||
419             current_frame->len > current_frame->size ||
420             start == limit) {
421                 return start;
422         }
423         size_t orig_bytes = limit - start;
424         if (orig_bytes < 128) {
425                 // Don't bother.
426                 return start;
427         }
428
429         // Don't read more bytes than we can write.
430         limit = min(limit, start + (current_frame->size - current_frame->len));
431
432         // Align end to 32 bytes.
433         limit = (const uint8_t *)(intptr_t(limit) & ~31);
434
435         if (start >= limit) {
436                 return start;
437         }
438
439         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
440         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
441         if (aligned_start != start) {
442                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
443                 if (sync_start == nullptr) {
444                         add_to_frame(current_frame, "", start, aligned_start);
445                 } else {
446                         add_to_frame(current_frame, "", start, sync_start);
447                         return sync_start;
448                 }
449         }
450
451         // Make the length a multiple of 64.
452         if (current_frame->interleaved) {
453                 if (((limit - aligned_start) % 64) != 0) {
454                         limit -= 32;
455                 }
456                 assert(((limit - aligned_start) % 64) == 0);
457         }
458
459 #if __AVX2__
460         const __m256i needle = _mm256_set1_epi8(sync_char);
461
462         const __restrict __m256i *in = (const __m256i *)aligned_start;
463         if (current_frame->interleaved) {
464                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
465                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
466                 if (current_frame->len % 2 == 1) {
467                         swap(out1, out2);
468                 }
469
470                 __m256i shuffle_cw = _mm256_set_epi8(
471                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
472                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
473                 while (in < (const __m256i *)limit) {
474                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
475                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
476                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
477
478                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
479                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
480                         __m256i found = _mm256_or_si256(found1, found2);
481
482                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
483                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
484                 
485                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
486                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
487
488                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
489                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
490
491                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
492                         _mm256_storeu_si256(out2, hi);
493
494                         if (!_mm256_testz_si256(found, found)) {
495                                 break;
496                         }
497
498                         in += 2;
499                         ++out1;
500                         ++out2;
501                 }
502                 current_frame->len += (uint8_t *)in - aligned_start;
503         } else {
504                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
505                 while (in < (const __m256i *)limit) {
506                         __m256i data = _mm256_load_si256(in);
507                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
508                         __m256i found = _mm256_cmpeq_epi8(data, needle);
509                         if (!_mm256_testz_si256(found, found)) {
510                                 break;
511                         }
512
513                         ++in;
514                         ++out;
515                 }
516                 current_frame->len = (uint8_t *)out - current_frame->data;
517         }
518 #else
519         const __m128i needle = _mm_set1_epi8(sync_char);
520
521         const __m128i *in = (const __m128i *)aligned_start;
522         if (current_frame->interleaved) {
523                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
524                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
525                 if (current_frame->len % 2 == 1) {
526                         swap(out1, out2);
527                 }
528
529                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
530                 while (in < (const __m128i *)limit) {
531                         __m128i data1 = _mm_load_si128(in);
532                         __m128i data2 = _mm_load_si128(in + 1);
533                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
534                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
535                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
536                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
537                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
538                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
539                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
540                         _mm_storeu_si128(out2, hi);
541                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
542                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
543                         if (!_mm_testz_si128(found1, found1) ||
544                             !_mm_testz_si128(found2, found2)) {
545                                 break;
546                         }
547
548                         in += 2;
549                         ++out1;
550                         ++out2;
551                 }
552                 current_frame->len += (uint8_t *)in - aligned_start;
553         } else {
554                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
555                 while (in < (const __m128i *)limit) {
556                         __m128i data = _mm_load_si128(in);
557                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
558                         __m128i found = _mm_cmpeq_epi8(data, needle);
559                         if (!_mm_testz_si128(found, found)) {
560                                 break;
561                         }
562
563                         ++in;
564                         ++out;
565                 }
566                 current_frame->len = (uint8_t *)out - current_frame->data;
567         }
568 #endif
569
570         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
571
572         return (const uint8_t *)in;
573 }
574 #endif
575
576 void decode_packs(const libusb_transfer *xfr,
577                   const char *sync_pattern,
578                   int sync_length,
579                   FrameAllocator::Frame *current_frame,
580                   const char *frame_type_name,
581                   function<void(const uint8_t *start)> start_callback)
582 {
583         int offset = 0;
584         for (int i = 0; i < xfr->num_iso_packets; i++) {
585                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
586
587                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
588                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
589                         continue;
590 //exit(5);
591                 }
592
593                 const uint8_t *start = xfr->buffer + offset;
594                 const uint8_t *limit = start + pack->actual_length;
595                 while (start < limit) {  // Usually runs only one iteration.
596 #ifdef __SSE4_1__
597                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
598                         if (start == limit) break;
599                         assert(start < limit);
600 #endif
601
602                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
603                         if (start_next_frame == nullptr) {
604                                 // add the rest of the buffer
605                                 add_to_frame(current_frame, frame_type_name, start, limit);
606                                 break;
607                         } else {
608                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
609                                 start = start_next_frame + sync_length;  // skip sync
610                                 start_callback(start);
611                         }
612                 }
613 #if 0
614                 dump_pack(xfr, offset, pack);
615 #endif
616                 offset += pack->length;
617         }
618 }
619
620 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
621 {
622         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
623                 fprintf(stderr, "transfer status %d\n", xfr->status);
624                 libusb_free_transfer(xfr);
625                 exit(3);
626         }
627
628         assert(xfr->user_data != nullptr);
629         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
630
631         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
632                 if (xfr->endpoint == 0x84) {
633                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
634                 } else {
635                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
636
637                         // Update the transfer with the new assumed width, if we're in the process of changing formats.
638                         change_xfer_size_for_width(usb->assumed_frame_width, xfr);
639                 }
640         }
641         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
642                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
643                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
644 #if 0
645                 if (setup->wIndex == 44) {
646                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
647                 } else {
648                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
649                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
650                 }
651 #else
652                 memcpy(usb->register_file + usb->current_register, buf, 4);
653                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
654                 if (usb->current_register == 0) {
655                         // read through all of them
656                         printf("register dump:");
657                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
658                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
659                         }
660                         printf("\n");
661                 }
662                 libusb_fill_control_setup(xfr->buffer,
663                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
664                         /*index=*/usb->current_register, /*length=*/4);
665 #endif
666         }
667
668 #if 0
669         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
670         for (i = 0; i < xfr->actual_length; i++) {
671                 printf("%02x", xfr->buffer[i]);
672                 if (i % 16)
673                         printf("\n");
674                 else if (i % 8)
675                         printf("  ");
676                 else
677                         printf(" ");
678         }
679 #endif
680
681         int rc = libusb_submit_transfer(xfr);
682         if (rc < 0) {
683                 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
684                 exit(1);
685         }
686 }
687
688 void BMUSBCapture::usb_thread_func()
689 {
690         sched_param param;
691         memset(&param, 0, sizeof(param));
692         param.sched_priority = 1;
693         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
694                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
695         }
696         while (!should_quit) {
697                 int rc = libusb_handle_events(nullptr);
698                 if (rc != LIBUSB_SUCCESS)
699                         break;
700         }
701 }
702
703 struct USBCardDevice {
704         uint16_t product;
705         uint8_t bus, port;
706         libusb_device *device;
707 };
708
709 libusb_device_handle *open_card(int card_index)
710 {       
711         libusb_device **devices;
712         ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
713         if (num_devices == -1) {
714                 fprintf(stderr, "Error finding USB devices\n");
715                 exit(1);
716         }
717         vector<USBCardDevice> found_cards;
718         for (ssize_t i = 0; i < num_devices; ++i) {
719                 libusb_device_descriptor desc;
720                 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
721                         fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
722                         exit(1);
723                 }
724
725                 uint8_t bus = libusb_get_bus_number(devices[i]);
726                 uint8_t port = libusb_get_port_number(devices[i]);
727
728                 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
729                     !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
730                         libusb_unref_device(devices[i]);
731                         continue;
732                 }
733
734                 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
735         }
736         libusb_free_device_list(devices, 0);
737
738         // Sort the devices to get a consistent ordering.
739         sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
740                 if (a.product != b.product)
741                         return a.product < b.product;
742                 if (a.bus != b.bus)
743                         return a.bus < b.bus;
744                 return a.port < b.port;
745         });
746
747         for (size_t i = 0; i < found_cards.size(); ++i) {
748                 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
749                 if (found_cards[i].product == 0xbd3b) {
750                         fprintf(stderr, "Intensity Shuttle\n");
751                 } else if (found_cards[i].product == 0xbd4f) {
752                         fprintf(stderr, "UltraStudio SDI\n");
753                 } else {
754                         assert(false);
755                 }
756         }
757
758         if (size_t(card_index) >= found_cards.size()) {
759                 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
760                 exit(1);
761         }
762
763         libusb_device_handle *devh;
764         int rc = libusb_open(found_cards[card_index].device, &devh);
765         if (rc < 0) {
766                 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
767                 exit(1);
768         }
769
770         for (size_t i = 0; i < found_cards.size(); ++i) {
771                 libusb_unref_device(found_cards[i].device);
772         }
773
774         return devh;
775 }
776
777 void BMUSBCapture::configure_card()
778 {
779         if (video_frame_allocator == nullptr) {
780                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));  // FIXME: leak.
781         }
782         if (audio_frame_allocator == nullptr) {
783                 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));  // FIXME: leak.
784         }
785         dequeue_thread_should_quit = false;
786         dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
787
788         int rc;
789         struct libusb_transfer *xfr;
790
791         rc = libusb_init(nullptr);
792         if (rc < 0) {
793                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
794                 exit(1);
795         }
796
797         libusb_device_handle *devh = open_card(card_index);
798         if (!devh) {
799                 fprintf(stderr, "Error finding USB device\n");
800                 exit(1);
801         }
802
803         libusb_config_descriptor *config;
804         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
805         if (rc < 0) {
806                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
807                 exit(1);
808         }
809
810 #if 0
811         printf("%d interface\n", config->bNumInterfaces);
812         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
813                 printf("  interface %d\n", interface_number);
814                 const libusb_interface *interface = &config->interface[interface_number];
815                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
816                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
817                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
818                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
819                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
820                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
821                         }
822                 }
823         }
824 #endif
825
826         rc = libusb_set_configuration(devh, /*configuration=*/1);
827         if (rc < 0) {
828                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
829                 exit(1);
830         }
831
832         rc = libusb_claim_interface(devh, 0);
833         if (rc < 0) {
834                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
835                 exit(1);
836         }
837
838         // Alternate setting 1 is output, alternate setting 2 is input.
839         // Card is reset when switching alternates, so the driver uses
840         // this “double switch” when it wants to reset.
841         //
842         // There's also alternate settings 3 and 4, which seem to be
843         // like 1 and 2 except they advertise less bandwidth needed.
844         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
845         if (rc < 0) {
846                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
847                 exit(1);
848         }
849         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
850         if (rc < 0) {
851                 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
852                 exit(1);
853         }
854 #if 0
855         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
856         if (rc < 0) {
857                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
858                 exit(1);
859         }
860 #endif
861
862 #if 0
863         rc = libusb_claim_interface(devh, 3);
864         if (rc < 0) {
865                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
866                 exit(1);
867         }
868 #endif
869
870         // theories:
871         //   44 is some kind of timer register (first 16 bits count upwards)
872         //   24 is some sort of watchdog?
873         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
874         //      (or will go to 0x73c60010?), also seen 0x73c60100
875         //   12 also changes all the time, unclear why  
876         //   16 seems to be autodetected mode somehow
877         //      --    this is e00115e0 after reset?
878         //                    ed0115e0 after mode change [to output?]
879         //                    2d0015e0 after more mode change [to input]
880         //                    ed0115e0 after more mode change
881         //                    2d0015e0 after more mode change
882         //
883         //                    390115e0 seems to indicate we have signal
884         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
885         //
886         //                    200015e0 on startup
887         //         changes to 250115e0 when we sync to the signal
888         //
889         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
890         //
891         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
892         //
893         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
894         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
895         //
896         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
897         //    perhaps some of them are related to analog output?
898         //
899         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
900         //    but the driver sets it to 0x8036802a at some point.
901         //
902         //    all of this is on request 214/215. other requests (192, 219,
903         //    222, 223, 224) are used for firmware upgrade. Probably best to
904         //    stay out of it unless you know what you're doing.
905         //
906         //
907         // register 16:
908         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
909         //
910         // theories:
911         //   0x01 - stable signal
912         //   0x04 - deep color
913         //   0x08 - unknown (audio??)
914         //   0x20 - 720p??
915         //   0x30 - 576p??
916
917         struct ctrl {
918                 int endpoint;
919                 int request;
920                 int index;
921                 uint32_t data;
922         };
923         static const ctrl ctrls[] = {
924                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
925                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
926
927                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
928                 // capture (v210).
929                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
930                 // 0x10000000 = analog audio instead of embedded audio, it seems
931                 // 0x3a000000 = component video? (analog audio)
932                 // 0x3c000000 = composite video? (analog audio)
933                 // 0x3e000000 = s-video? (analog audio)
934                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
935                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
936                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
937                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
938                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
939         };
940
941         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
942                 uint32_t flipped = htonl(ctrls[req].data);
943                 static uint8_t value[4];
944                 memcpy(value, &flipped, sizeof(flipped));
945                 int size = sizeof(value);
946                 //if (ctrls[req].request == 215) size = 0;
947                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
948                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
949                 if (rc < 0) {
950                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
951                         exit(1);
952                 }
953
954                 if (ctrls[req].index == 16 && rc == 4) {
955                         printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
956                 }
957
958 #if 0
959                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
960                 for (int i = 0; i < rc; ++i) {
961                         printf("%02x", value[i]);
962                 }
963                 printf("\n");
964 #endif
965         }
966
967 #if 0
968         // DEBUG
969         for ( ;; ) {
970                 static int my_index = 0;
971                 static uint8_t value[4];
972                 int size = sizeof(value);
973                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
974                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
975                 if (rc < 0) {
976                         fprintf(stderr, "Error on control\n");
977                         exit(1);
978                 }
979                 printf("rc=%d index=%d: 0x", rc, my_index);
980                 for (int i = 0; i < rc; ++i) {
981                         printf("%02x", value[i]);
982                 }
983                 printf("\n");
984         }
985 #endif
986
987 #if 0
988         // set up an asynchronous transfer of the timer register
989         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
990         static int completed = 0;
991
992         xfr = libusb_alloc_transfer(0);
993         libusb_fill_control_setup(cmdbuf,
994             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
995                 /*index=*/44, /*length=*/4);
996         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
997         xfr->user_data = this;
998         libusb_submit_transfer(xfr);
999
1000         // set up an asynchronous transfer of register 24
1001         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1002         static int completed2 = 0;
1003
1004         xfr = libusb_alloc_transfer(0);
1005         libusb_fill_control_setup(cmdbuf2,
1006             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1007                 /*index=*/24, /*length=*/4);
1008         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1009         xfr->user_data = this;
1010         libusb_submit_transfer(xfr);
1011 #endif
1012
1013         // set up an asynchronous transfer of the register dump
1014         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1015         static int completed3 = 0;
1016
1017         xfr = libusb_alloc_transfer(0);
1018         libusb_fill_control_setup(cmdbuf3,
1019             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1020                 /*index=*/current_register, /*length=*/4);
1021         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1022         xfr->user_data = this;
1023         //libusb_submit_transfer(xfr);
1024
1025         audiofp = fopen("audio.raw", "wb");
1026
1027         // set up isochronous transfers for audio and video
1028         for (int e = 3; e <= 4; ++e) {
1029                 //int num_transfers = (e == 3) ? 6 : 6;
1030                 int num_transfers = 10;
1031                 for (int i = 0; i < num_transfers; ++i) {
1032                         size_t buf_size;
1033                         int num_iso_pack, size;
1034                         if (e == 3) {
1035                                 // Allocate for minimum width (because that will give us the most
1036                                 // number of packets, so we don't need to reallocated, but we'll
1037                                 // default to 720p for the first frame.
1038                                 size = find_xfer_size_for_width(MIN_WIDTH);
1039                                 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1040                                 buf_size = USB_VIDEO_TRANSFER_SIZE;
1041                         } else {
1042                                 size = 0xc0;
1043                                 num_iso_pack = 80;
1044                                 buf_size = num_iso_pack * size;
1045                         }
1046                         assert(size_t(num_iso_pack * size) <= buf_size);
1047                         uint8_t *buf = new uint8_t[buf_size];
1048
1049                         xfr = libusb_alloc_transfer(num_iso_pack);
1050                         if (!xfr) {
1051                                 fprintf(stderr, "oom\n");
1052                                 exit(1);
1053                         }
1054
1055                         int ep = LIBUSB_ENDPOINT_IN | e;
1056                         libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1057                                 num_iso_pack, cb_xfr, nullptr, 0);
1058                         libusb_set_iso_packet_lengths(xfr, size);
1059                         xfr->user_data = this;
1060
1061                         if (e == 3) {
1062                                 change_xfer_size_for_width(assumed_frame_width, xfr);
1063                         }
1064
1065                         iso_xfrs.push_back(xfr);
1066                 }
1067         }
1068 }
1069
1070 void BMUSBCapture::start_bm_capture()
1071 {
1072         int i = 0;
1073         for (libusb_transfer *xfr : iso_xfrs) {
1074                 int rc = libusb_submit_transfer(xfr);
1075                 ++i;
1076                 if (rc < 0) {
1077                         //printf("num_bytes=%d\n", num_bytes);
1078                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1079                                 xfr->endpoint, i, libusb_error_name(rc));
1080                         exit(1);
1081                 }
1082         }
1083
1084
1085 #if 0
1086         libusb_release_interface(devh, 0);
1087 out:
1088         if (devh)
1089                 libusb_close(devh);
1090         libusb_exit(nullptr);
1091         return rc;
1092 #endif
1093 }
1094
1095 void BMUSBCapture::stop_dequeue_thread()
1096 {
1097         dequeue_thread_should_quit = true;
1098         queues_not_empty.notify_all();
1099         dequeue_thread.join();
1100 }
1101
1102 void BMUSBCapture::start_bm_thread()
1103 {
1104         should_quit = false;
1105         usb_thread = thread(&BMUSBCapture::usb_thread_func);
1106 }
1107
1108 void BMUSBCapture::stop_bm_thread()
1109 {
1110         should_quit = true;
1111         usb_thread.join();
1112 }
1113
1114 struct VideoFormatEntry {
1115         uint16_t normalized_video_format;
1116         unsigned width, height;
1117         unsigned extra_lines_top, extra_lines_bottom;
1118         unsigned frame_rate_nom, frame_rate_den;
1119         bool interlaced;
1120 };
1121
1122 bool decode_video_format(uint16_t video_format, unsigned *width, unsigned *height, unsigned *extra_lines_top, unsigned *extra_lines_bottom,
1123                          unsigned *frame_rate_nom, unsigned *frame_rate_den, bool *interlaced)
1124 {
1125         *interlaced = false;
1126
1127         // TODO: Add these for all formats as we find them.
1128         *extra_lines_top = *extra_lines_bottom = 0;
1129
1130         if (video_format == 0x0800) {
1131                 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1132                 // It's a strange thing, but what can you do.
1133                 *width = 720;
1134                 *height = 525;
1135                 *extra_lines_top = 0;
1136                 *extra_lines_bottom = 0;
1137                 *frame_rate_nom = 3013;
1138                 *frame_rate_den = 100;
1139                 return true;
1140         }
1141         if ((video_format & 0xe800) != 0xe800) {
1142                 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1143                         video_format);
1144                 *width = 0;
1145                 *height = 0;
1146                 *extra_lines_top = 0;
1147                 *extra_lines_bottom = 0;
1148                 *frame_rate_nom = 60;
1149                 *frame_rate_den = 1;
1150                 return false;
1151         }
1152
1153         // NTSC (480i59.94, I suppose). A special case, see below.
1154         if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1155                 *width = 640;
1156                 *height = 480;
1157                 *frame_rate_nom = 60000;
1158                 *frame_rate_den = 1001;
1159                 *interlaced = true;
1160                 return true;
1161         }
1162
1163         // PAL (576i50, I suppose). A special case, see below.
1164         if (video_format == 0xe909) {
1165                 *width = 720;
1166                 *height = 576;
1167                 *frame_rate_nom = 50;
1168                 *frame_rate_den = 1;
1169                 *interlaced = true;
1170                 return true;
1171         }
1172
1173         // 0x8 seems to be a flag about availability of deep color on the input,
1174         // except when it's not (e.g. it's the only difference between NTSC
1175         // and PAL). Rather confusing. But we clear it here nevertheless, because
1176         // usually it doesn't mean anything.
1177         uint16_t normalized_video_format = video_format & ~0xe808;
1178         constexpr VideoFormatEntry entries[] = {
1179                 { 0x0143, 1280,  720, 25,  5,    50,    1, false },  // 720p50.
1180                 { 0x0103, 1280,  720, 25,  5,    60,    1, false },  // 720p60.
1181                 { 0x0121, 1280,  720, 25,  5, 60000, 1001, false },  // 720p59.94.
1182                 { 0x01c3, 1920, 1080,  0,  0,    30,    1, false },  // 1080p30.
1183                 { 0x0003, 1920, 1080, 20, 25,    30,    1,  true },  // 1080i60.
1184                 { 0x01e1, 1920, 1080,  0,  0, 30000, 1001, false },  // 1080p29.97.
1185                 { 0x0021, 1920, 1080, 20, 25, 30000, 1001,  true },  // 1080i59.94.
1186                 { 0x0063, 1920, 1080,  0,  0,    25,    1, false },  // 1080p25.
1187                 { 0x0043, 1920, 1080,  0,  0,    25,    1,  true },  // 1080p50.
1188                 { 0x008e, 1920, 1080,  0,  0,    24,    1, false },  // 1080p24.
1189                 { 0x00a1, 1920, 1080,  0,  0, 24000, 1001, false },  // 1080p23.98.
1190         };
1191         for (const VideoFormatEntry &entry : entries) {
1192                 if (normalized_video_format == entry.normalized_video_format) {
1193                         *width = entry.width;
1194                         *height = entry.height;
1195                         *extra_lines_top = entry.extra_lines_top;
1196                         *extra_lines_bottom = entry.extra_lines_bottom;
1197                         *frame_rate_nom = entry.frame_rate_nom;
1198                         *frame_rate_den = entry.frame_rate_den;
1199                         *interlaced = entry.interlaced;
1200                         return true;
1201                 }
1202         }
1203
1204         printf("Unknown video format 0x%04x. Assuming 720p60.\n", video_format);
1205         *width = 1280;
1206         *height = 720;
1207         *frame_rate_nom = 60;
1208         *frame_rate_den = 1;
1209         return false;
1210 }