]> git.sesse.net Git - bmusb/blob - bmusb.cpp
48bdcae3c41516815cb31a9d1cafb9a015d28f1c
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <assert.h>
8 #include <errno.h>
9 #include <libusb.h>
10 #include <netinet/in.h>
11 #include <sched.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #ifdef __SSE4_1__
17 #include <immintrin.h>
18 #endif
19 #include "bmusb.h"
20
21 #include <algorithm>
22 #include <atomic>
23 #include <condition_variable>
24 #include <cstddef>
25 #include <cstdint>
26 #include <deque>
27 #include <functional>
28 #include <memory>
29 #include <mutex>
30 #include <stack>
31 #include <string>
32 #include <thread>
33
34 using namespace std;
35 using namespace std::placeholders;
36
37 #define MIN_WIDTH 640
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
41
42 #define FRAME_SIZE (8 << 20)  // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10)  // 128 kB.
44
45 namespace {
46
47 FILE *audiofp;
48
49 thread usb_thread;
50 atomic<bool> should_quit;
51
52 int find_xfer_size_for_width(int width)
53 {
54         // Video seems to require isochronous packets scaled with the width;
55         // seemingly six lines is about right, rounded up to the required 1kB
56         // multiple.
57         int size = width * 2 * 6;
58         // Note that for 10-bit input, you'll need to increase size accordingly.
59         //size = size * 4 / 3;
60         if (size % 1024 != 0) {
61                 size &= ~1023;
62                 size += 1024;
63         }
64         return size;
65 }
66
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
68 {
69         assert(width >= MIN_WIDTH);
70         size_t size = find_xfer_size_for_width(width);
71         int num_iso_pack = xfr->length / size;
72         if (num_iso_pack != xfr->num_iso_packets ||
73             size != xfr->iso_packet_desc[0].length) {
74                 xfr->num_iso_packets = num_iso_pack;
75                 libusb_set_iso_packet_lengths(xfr, size);
76         }
77 }
78
79 }  // namespace
80
81 FrameAllocator::~FrameAllocator() {}
82
83 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
84         : frame_size(frame_size)
85 {
86         for (size_t i = 0; i < num_queued_frames; ++i) {
87                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
88         }
89 }
90
91 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
92 {
93         Frame vf;
94         vf.owner = this;
95
96         unique_lock<mutex> lock(freelist_mutex);  // Meh.
97         if (freelist.empty()) {
98                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
99                         frame_size);
100         } else {
101                 vf.data = freelist.top().release();
102                 vf.size = frame_size;
103                 freelist.pop();  // Meh.
104         }
105         return vf;
106 }
107
108 void MallocFrameAllocator::release_frame(Frame frame)
109 {
110         if (frame.overflow > 0) {
111                 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
112         }
113         unique_lock<mutex> lock(freelist_mutex);
114         freelist.push(unique_ptr<uint8_t[]>(frame.data));
115 }
116
117 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
118 {
119         if (a == b) {
120                 return false;
121         } else if (a < b) {
122                 return (b - a < 0x8000);
123         } else {
124                 int wrap_b = 0x10000 + int(b);
125                 return (wrap_b - a < 0x8000);
126         }
127 }
128
129 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
130 {
131         unique_lock<mutex> lock(queue_lock);
132         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
133                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
134                         q->back().timecode, timecode);
135                 frame.owner->release_frame(frame);
136                 return;
137         }
138
139         QueuedFrame qf;
140         qf.format = format;
141         qf.timecode = timecode;
142         qf.frame = frame;
143         q->push_back(move(qf));
144         queues_not_empty.notify_one();  // might be spurious
145 }
146
147 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
148 {
149         FILE *fp = fopen(filename, "wb");
150         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
151                 printf("short write!\n");
152         }
153         fclose(fp);
154 }
155
156 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
157 {
158         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
159 }
160
161 void BMUSBCapture::dequeue_thread_func()
162 {
163         if (has_dequeue_callbacks) {
164                 dequeue_init_callback();
165         }
166         while (!dequeue_thread_should_quit) {
167                 unique_lock<mutex> lock(queue_lock);
168                 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
169
170                 if (dequeue_thread_should_quit) break;
171
172                 uint16_t video_timecode = pending_video_frames.front().timecode;
173                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
174                 AudioFormat audio_format;
175                 audio_format.bits_per_sample = 24;
176                 audio_format.num_channels = 8;
177                 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
178                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
179                                 video_timecode);
180                         QueuedFrame video_frame = pending_video_frames.front();
181                         pending_video_frames.pop_front();
182                         lock.unlock();
183                         video_frame_allocator->release_frame(video_frame.frame);
184                 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
185                         printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
186                                 audio_timecode);
187                         QueuedFrame audio_frame = pending_audio_frames.front();
188                         pending_audio_frames.pop_front();
189                         lock.unlock();
190                         audio_format.id = audio_frame.format;
191
192                         // Use the video format of the pending frame.
193                         QueuedFrame video_frame = pending_video_frames.front();
194                         VideoFormat video_format;
195                         decode_video_format(video_frame.format, &video_format);
196
197                         frame_callback(audio_timecode,
198                                        FrameAllocator::Frame(), 0, video_format,
199                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
200                 } else {
201                         QueuedFrame video_frame = pending_video_frames.front();
202                         QueuedFrame audio_frame = pending_audio_frames.front();
203                         pending_audio_frames.pop_front();
204                         pending_video_frames.pop_front();
205                         lock.unlock();
206
207 #if 0
208                         char filename[255];
209                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
210                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
211                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
212 #endif
213
214                         VideoFormat video_format;
215                         audio_format.id = audio_frame.format;
216                         if (decode_video_format(video_frame.format, &video_format)) {
217                                 frame_callback(video_timecode,
218                                                video_frame.frame, HEADER_SIZE, video_format,
219                                                audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
220                         } else {
221                                 frame_callback(video_timecode,
222                                                FrameAllocator::Frame(), 0, video_format,
223                                                audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
224                         }
225                 }
226         }
227         if (has_dequeue_callbacks) {
228                 dequeue_cleanup_callback();
229         }
230 }
231
232 void BMUSBCapture::start_new_frame(const uint8_t *start)
233 {
234         uint16_t format = (start[3] << 8) | start[2];
235         uint16_t timecode = (start[1] << 8) | start[0];
236
237         if (current_video_frame.len > 0) {
238                 // If format is 0x0800 (no signal), add a fake (empty) audio
239                 // frame to get it out of the queue.
240                 // TODO: Figure out if there are other formats that come with
241                 // no audio, and treat them the same.
242                 if (format == 0x0800) {
243                         FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
244                         if (fake_audio_frame.data == nullptr) {
245                                 // Oh well, it's just a no-signal frame anyway.
246                                 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
247                                 current_video_frame.owner->release_frame(current_video_frame);
248                                 current_video_frame = video_frame_allocator->alloc_frame();
249                                 return;
250                         }
251                         queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
252                 }
253                 //dump_frame();
254                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
255
256                 // Update the assumed frame width. We might be one frame too late on format changes,
257                 // but it's much better than asking the user to choose manually.
258                 VideoFormat video_format;
259                 if (decode_video_format(format, &video_format)) {
260                         assumed_frame_width = video_format.width;
261                 }
262         }
263         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
264         //      format, timecode,
265         //      //start[7], start[6], start[5], start[4],
266         //      read_current_frame, FRAME_SIZE);
267
268         current_video_frame = video_frame_allocator->alloc_frame();
269         //if (current_video_frame.data == nullptr) {
270         //      read_current_frame = -1;
271         //} else {
272         //      read_current_frame = 0;
273         //}
274 }
275
276 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
277 {
278         uint16_t format = (start[3] << 8) | start[2];
279         uint16_t timecode = (start[1] << 8) | start[0];
280         if (current_audio_frame.len > 0) {
281                 //dump_audio_block();
282                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
283         }
284         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
285         //      format, timecode, read_current_audio_block);
286         current_audio_frame = audio_frame_allocator->alloc_frame();
287 }
288
289 #if 0
290 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
291 {
292         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
293         for (unsigned j = 0; j < pack->actual_length; j++) {
294         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
295                 printf("%02x", xfr->buffer[j + offset]);
296                 if ((j % 16) == 15)
297                         printf("\n");
298                 else if ((j % 8) == 7)
299                         printf("  ");
300                 else
301                         printf(" ");
302         }
303 }
304 #endif
305
306 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
307 {
308         assert(n % 2 == 0);
309         uint8_t *dptr1 = dest1;
310         uint8_t *dptr2 = dest2;
311
312         for (size_t i = 0; i < n; i += 2) {
313                 *dptr1++ = *src++;
314                 *dptr2++ = *src++;
315         }
316 }
317
318 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
319 {
320         if (current_frame->data == nullptr ||
321             current_frame->len > current_frame->size ||
322             start == end) {
323                 return;
324         }
325
326         int bytes = end - start;
327         if (current_frame->len + bytes > current_frame->size) {
328                 current_frame->overflow = current_frame->len + bytes - current_frame->size;
329                 current_frame->len = current_frame->size;
330                 if (current_frame->overflow > 1048576) {
331                         printf("%d bytes overflow after last %s frame\n",
332                                 int(current_frame->overflow), frame_type_name);
333                         current_frame->overflow = 0;
334                 }
335                 //dump_frame();
336         } else {
337                 if (current_frame->interleaved) {
338                         uint8_t *data = current_frame->data + current_frame->len / 2;
339                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
340                         if (current_frame->len % 2 == 1) {
341                                 ++data;
342                                 swap(data, data2);
343                         }
344                         if (bytes % 2 == 1) {
345                                 *data++ = *start++;
346                                 swap(data, data2);
347                                 ++current_frame->len;
348                                 --bytes;
349                         }
350                         memcpy_interleaved(data, data2, start, bytes);
351                         current_frame->len += bytes;
352                 } else {
353                         memcpy(current_frame->data + current_frame->len, start, bytes);
354                         current_frame->len += bytes;
355                 }
356         }
357 }
358
359 #ifdef __SSE4_1__
360
361 #if 0
362 void avx2_dump(const char *name, __m256i n)
363 {
364         printf("%-10s:", name);
365         printf(" %02x", _mm256_extract_epi8(n, 0));
366         printf(" %02x", _mm256_extract_epi8(n, 1));
367         printf(" %02x", _mm256_extract_epi8(n, 2));
368         printf(" %02x", _mm256_extract_epi8(n, 3));
369         printf(" %02x", _mm256_extract_epi8(n, 4));
370         printf(" %02x", _mm256_extract_epi8(n, 5));
371         printf(" %02x", _mm256_extract_epi8(n, 6));
372         printf(" %02x", _mm256_extract_epi8(n, 7));
373         printf(" ");
374         printf(" %02x", _mm256_extract_epi8(n, 8));
375         printf(" %02x", _mm256_extract_epi8(n, 9));
376         printf(" %02x", _mm256_extract_epi8(n, 10));
377         printf(" %02x", _mm256_extract_epi8(n, 11));
378         printf(" %02x", _mm256_extract_epi8(n, 12));
379         printf(" %02x", _mm256_extract_epi8(n, 13));
380         printf(" %02x", _mm256_extract_epi8(n, 14));
381         printf(" %02x", _mm256_extract_epi8(n, 15));
382         printf(" ");
383         printf(" %02x", _mm256_extract_epi8(n, 16));
384         printf(" %02x", _mm256_extract_epi8(n, 17));
385         printf(" %02x", _mm256_extract_epi8(n, 18));
386         printf(" %02x", _mm256_extract_epi8(n, 19));
387         printf(" %02x", _mm256_extract_epi8(n, 20));
388         printf(" %02x", _mm256_extract_epi8(n, 21));
389         printf(" %02x", _mm256_extract_epi8(n, 22));
390         printf(" %02x", _mm256_extract_epi8(n, 23));
391         printf(" ");
392         printf(" %02x", _mm256_extract_epi8(n, 24));
393         printf(" %02x", _mm256_extract_epi8(n, 25));
394         printf(" %02x", _mm256_extract_epi8(n, 26));
395         printf(" %02x", _mm256_extract_epi8(n, 27));
396         printf(" %02x", _mm256_extract_epi8(n, 28));
397         printf(" %02x", _mm256_extract_epi8(n, 29));
398         printf(" %02x", _mm256_extract_epi8(n, 30));
399         printf(" %02x", _mm256_extract_epi8(n, 31));
400         printf("\n");
401 }
402 #endif
403
404 // Does a memcpy and memchr in one to reduce processing time.
405 // Note that the benefit is somewhat limited if your L3 cache is small,
406 // as you'll (unfortunately) spend most of the time loading the data
407 // from main memory.
408 //
409 // Complicated cases are left to the slow path; it basically stops copying
410 // up until the first instance of "sync_char" (usually a bit before, actually).
411 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
412 // data, and what we really need this for is the 00 00 ff ff marker in video data.
413 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
414 {
415         if (current_frame->data == nullptr ||
416             current_frame->len > current_frame->size ||
417             start == limit) {
418                 return start;
419         }
420         size_t orig_bytes = limit - start;
421         if (orig_bytes < 128) {
422                 // Don't bother.
423                 return start;
424         }
425
426         // Don't read more bytes than we can write.
427         limit = min(limit, start + (current_frame->size - current_frame->len));
428
429         // Align end to 32 bytes.
430         limit = (const uint8_t *)(intptr_t(limit) & ~31);
431
432         if (start >= limit) {
433                 return start;
434         }
435
436         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
437         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
438         if (aligned_start != start) {
439                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
440                 if (sync_start == nullptr) {
441                         add_to_frame(current_frame, "", start, aligned_start);
442                 } else {
443                         add_to_frame(current_frame, "", start, sync_start);
444                         return sync_start;
445                 }
446         }
447
448         // Make the length a multiple of 64.
449         if (current_frame->interleaved) {
450                 if (((limit - aligned_start) % 64) != 0) {
451                         limit -= 32;
452                 }
453                 assert(((limit - aligned_start) % 64) == 0);
454         }
455
456 #if __AVX2__
457         const __m256i needle = _mm256_set1_epi8(sync_char);
458
459         const __restrict __m256i *in = (const __m256i *)aligned_start;
460         if (current_frame->interleaved) {
461                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
462                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
463                 if (current_frame->len % 2 == 1) {
464                         swap(out1, out2);
465                 }
466
467                 __m256i shuffle_cw = _mm256_set_epi8(
468                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
469                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
470                 while (in < (const __m256i *)limit) {
471                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
472                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
473                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
474
475                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
476                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
477                         __m256i found = _mm256_or_si256(found1, found2);
478
479                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
480                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
481                 
482                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
483                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
484
485                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
486                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
487
488                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
489                         _mm256_storeu_si256(out2, hi);
490
491                         if (!_mm256_testz_si256(found, found)) {
492                                 break;
493                         }
494
495                         in += 2;
496                         ++out1;
497                         ++out2;
498                 }
499                 current_frame->len += (uint8_t *)in - aligned_start;
500         } else {
501                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
502                 while (in < (const __m256i *)limit) {
503                         __m256i data = _mm256_load_si256(in);
504                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
505                         __m256i found = _mm256_cmpeq_epi8(data, needle);
506                         if (!_mm256_testz_si256(found, found)) {
507                                 break;
508                         }
509
510                         ++in;
511                         ++out;
512                 }
513                 current_frame->len = (uint8_t *)out - current_frame->data;
514         }
515 #else
516         const __m128i needle = _mm_set1_epi8(sync_char);
517
518         const __m128i *in = (const __m128i *)aligned_start;
519         if (current_frame->interleaved) {
520                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
521                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
522                 if (current_frame->len % 2 == 1) {
523                         swap(out1, out2);
524                 }
525
526                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
527                 while (in < (const __m128i *)limit) {
528                         __m128i data1 = _mm_load_si128(in);
529                         __m128i data2 = _mm_load_si128(in + 1);
530                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
531                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
532                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
533                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
534                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
535                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
536                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
537                         _mm_storeu_si128(out2, hi);
538                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
539                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
540                         if (!_mm_testz_si128(found1, found1) ||
541                             !_mm_testz_si128(found2, found2)) {
542                                 break;
543                         }
544
545                         in += 2;
546                         ++out1;
547                         ++out2;
548                 }
549                 current_frame->len += (uint8_t *)in - aligned_start;
550         } else {
551                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
552                 while (in < (const __m128i *)limit) {
553                         __m128i data = _mm_load_si128(in);
554                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
555                         __m128i found = _mm_cmpeq_epi8(data, needle);
556                         if (!_mm_testz_si128(found, found)) {
557                                 break;
558                         }
559
560                         ++in;
561                         ++out;
562                 }
563                 current_frame->len = (uint8_t *)out - current_frame->data;
564         }
565 #endif
566
567         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
568
569         return (const uint8_t *)in;
570 }
571 #endif
572
573 void decode_packs(const libusb_transfer *xfr,
574                   const char *sync_pattern,
575                   int sync_length,
576                   FrameAllocator::Frame *current_frame,
577                   const char *frame_type_name,
578                   function<void(const uint8_t *start)> start_callback)
579 {
580         int offset = 0;
581         for (int i = 0; i < xfr->num_iso_packets; i++) {
582                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
583
584                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
585                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
586                         continue;
587 //exit(5);
588                 }
589
590                 const uint8_t *start = xfr->buffer + offset;
591                 const uint8_t *limit = start + pack->actual_length;
592                 while (start < limit) {  // Usually runs only one iteration.
593 #ifdef __SSE4_1__
594                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
595                         if (start == limit) break;
596                         assert(start < limit);
597 #endif
598
599                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
600                         if (start_next_frame == nullptr) {
601                                 // add the rest of the buffer
602                                 add_to_frame(current_frame, frame_type_name, start, limit);
603                                 break;
604                         } else {
605                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
606                                 start = start_next_frame + sync_length;  // skip sync
607                                 start_callback(start);
608                         }
609                 }
610 #if 0
611                 dump_pack(xfr, offset, pack);
612 #endif
613                 offset += pack->length;
614         }
615 }
616
617 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
618 {
619         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
620                 fprintf(stderr, "transfer status %d\n", xfr->status);
621                 libusb_free_transfer(xfr);
622                 exit(3);
623         }
624
625         assert(xfr->user_data != nullptr);
626         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
627
628         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
629                 if (xfr->endpoint == 0x84) {
630                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
631                 } else {
632                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
633
634                         // Update the transfer with the new assumed width, if we're in the process of changing formats.
635                         change_xfer_size_for_width(usb->assumed_frame_width, xfr);
636                 }
637         }
638         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
639                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
640                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
641 #if 0
642                 if (setup->wIndex == 44) {
643                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
644                 } else {
645                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
646                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
647                 }
648 #else
649                 memcpy(usb->register_file + usb->current_register, buf, 4);
650                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
651                 if (usb->current_register == 0) {
652                         // read through all of them
653                         printf("register dump:");
654                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
655                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
656                         }
657                         printf("\n");
658                 }
659                 libusb_fill_control_setup(xfr->buffer,
660                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
661                         /*index=*/usb->current_register, /*length=*/4);
662 #endif
663         }
664
665 #if 0
666         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
667         for (i = 0; i < xfr->actual_length; i++) {
668                 printf("%02x", xfr->buffer[i]);
669                 if (i % 16)
670                         printf("\n");
671                 else if (i % 8)
672                         printf("  ");
673                 else
674                         printf(" ");
675         }
676 #endif
677
678         int rc = libusb_submit_transfer(xfr);
679         if (rc < 0) {
680                 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
681                 exit(1);
682         }
683 }
684
685 void BMUSBCapture::usb_thread_func()
686 {
687         sched_param param;
688         memset(&param, 0, sizeof(param));
689         param.sched_priority = 1;
690         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
691                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
692         }
693         while (!should_quit) {
694                 int rc = libusb_handle_events(nullptr);
695                 if (rc != LIBUSB_SUCCESS)
696                         break;
697         }
698 }
699
700 struct USBCardDevice {
701         uint16_t product;
702         uint8_t bus, port;
703         libusb_device *device;
704 };
705
706 libusb_device_handle *open_card(int card_index, string *description)
707 {       
708         libusb_device **devices;
709         ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
710         if (num_devices == -1) {
711                 fprintf(stderr, "Error finding USB devices\n");
712                 exit(1);
713         }
714         vector<USBCardDevice> found_cards;
715         for (ssize_t i = 0; i < num_devices; ++i) {
716                 libusb_device_descriptor desc;
717                 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
718                         fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
719                         exit(1);
720                 }
721
722                 uint8_t bus = libusb_get_bus_number(devices[i]);
723                 uint8_t port = libusb_get_port_number(devices[i]);
724
725                 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
726                     !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
727                         libusb_unref_device(devices[i]);
728                         continue;
729                 }
730
731                 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
732         }
733         libusb_free_device_list(devices, 0);
734
735         // Sort the devices to get a consistent ordering.
736         sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
737                 if (a.product != b.product)
738                         return a.product < b.product;
739                 if (a.bus != b.bus)
740                         return a.bus < b.bus;
741                 return a.port < b.port;
742         });
743
744         for (size_t i = 0; i < found_cards.size(); ++i) {
745                 const char *product_name = nullptr;
746                 if (found_cards[i].product == 0xbd3b) {
747                         product_name = "Intensity Shuttle";
748                 } else if (found_cards[i].product == 0xbd4f) {
749                         product_name = "UltraStudio SDI";
750                 } else {
751                         assert(false);
752                 }
753
754                 char buf[256];
755                 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u  %s",
756                         int(i), found_cards[i].bus, found_cards[i].port, product_name);
757                 if (i == size_t(card_index)) {
758                         *description = buf;
759                 }
760                 fprintf(stderr, "%s\n", buf);
761         }
762
763         if (size_t(card_index) >= found_cards.size()) {
764                 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
765                 exit(1);
766         }
767
768         libusb_device_handle *devh;
769         int rc = libusb_open(found_cards[card_index].device, &devh);
770         if (rc < 0) {
771                 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
772                 exit(1);
773         }
774
775         for (size_t i = 0; i < found_cards.size(); ++i) {
776                 libusb_unref_device(found_cards[i].device);
777         }
778
779         return devh;
780 }
781
782 void BMUSBCapture::configure_card()
783 {
784         if (video_frame_allocator == nullptr) {
785                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));  // FIXME: leak.
786         }
787         if (audio_frame_allocator == nullptr) {
788                 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));  // FIXME: leak.
789         }
790         dequeue_thread_should_quit = false;
791         dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
792
793         int rc;
794         struct libusb_transfer *xfr;
795
796         rc = libusb_init(nullptr);
797         if (rc < 0) {
798                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
799                 exit(1);
800         }
801
802         devh = open_card(card_index, &description);
803         if (!devh) {
804                 fprintf(stderr, "Error finding USB device\n");
805                 exit(1);
806         }
807
808         libusb_config_descriptor *config;
809         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
810         if (rc < 0) {
811                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
812                 exit(1);
813         }
814
815 #if 0
816         printf("%d interface\n", config->bNumInterfaces);
817         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
818                 printf("  interface %d\n", interface_number);
819                 const libusb_interface *interface = &config->interface[interface_number];
820                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
821                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
822                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
823                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
824                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
825                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
826                         }
827                 }
828         }
829 #endif
830
831         rc = libusb_set_configuration(devh, /*configuration=*/1);
832         if (rc < 0) {
833                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
834                 exit(1);
835         }
836
837         rc = libusb_claim_interface(devh, 0);
838         if (rc < 0) {
839                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
840                 exit(1);
841         }
842
843         // Alternate setting 1 is output, alternate setting 2 is input.
844         // Card is reset when switching alternates, so the driver uses
845         // this “double switch” when it wants to reset.
846         //
847         // There's also alternate settings 3 and 4, which seem to be
848         // like 1 and 2 except they advertise less bandwidth needed.
849         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
850         if (rc < 0) {
851                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
852                 exit(1);
853         }
854         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
855         if (rc < 0) {
856                 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
857                 exit(1);
858         }
859 #if 0
860         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
861         if (rc < 0) {
862                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
863                 exit(1);
864         }
865 #endif
866
867 #if 0
868         rc = libusb_claim_interface(devh, 3);
869         if (rc < 0) {
870                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
871                 exit(1);
872         }
873 #endif
874
875         // theories:
876         //   44 is some kind of timer register (first 16 bits count upwards)
877         //   24 is some sort of watchdog?
878         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
879         //      (or will go to 0x73c60010?), also seen 0x73c60100
880         //   12 also changes all the time, unclear why  
881         //   16 seems to be autodetected mode somehow
882         //      --    this is e00115e0 after reset?
883         //                    ed0115e0 after mode change [to output?]
884         //                    2d0015e0 after more mode change [to input]
885         //                    ed0115e0 after more mode change
886         //                    2d0015e0 after more mode change
887         //
888         //                    390115e0 seems to indicate we have signal
889         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
890         //
891         //                    200015e0 on startup
892         //         changes to 250115e0 when we sync to the signal
893         //
894         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
895         //
896         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
897         //
898         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
899         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
900         //
901         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
902         //    perhaps some of them are related to analog output?
903         //
904         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
905         //    but the driver sets it to 0x8036802a at some point.
906         //
907         //    all of this is on request 214/215. other requests (192, 219,
908         //    222, 223, 224) are used for firmware upgrade. Probably best to
909         //    stay out of it unless you know what you're doing.
910         //
911         //
912         // register 16:
913         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
914         //
915         // theories:
916         //   0x01 - stable signal
917         //   0x04 - deep color
918         //   0x08 - unknown (audio??)
919         //   0x20 - 720p??
920         //   0x30 - 576p??
921
922         update_capture_mode();
923
924         struct ctrl {
925                 int endpoint;
926                 int request;
927                 int index;
928                 uint32_t data;
929         };
930         static const ctrl ctrls[] = {
931                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
932                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
933
934                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
935                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
936                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
937                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
938         };
939
940         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
941                 uint32_t flipped = htonl(ctrls[req].data);
942                 static uint8_t value[4];
943                 memcpy(value, &flipped, sizeof(flipped));
944                 int size = sizeof(value);
945                 //if (ctrls[req].request == 215) size = 0;
946                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
947                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
948                 if (rc < 0) {
949                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
950                         exit(1);
951                 }
952
953                 if (ctrls[req].index == 16 && rc == 4) {
954                         printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
955                 }
956
957 #if 0
958                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
959                 for (int i = 0; i < rc; ++i) {
960                         printf("%02x", value[i]);
961                 }
962                 printf("\n");
963 #endif
964         }
965
966 #if 0
967         // DEBUG
968         for ( ;; ) {
969                 static int my_index = 0;
970                 static uint8_t value[4];
971                 int size = sizeof(value);
972                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
973                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
974                 if (rc < 0) {
975                         fprintf(stderr, "Error on control\n");
976                         exit(1);
977                 }
978                 printf("rc=%d index=%d: 0x", rc, my_index);
979                 for (int i = 0; i < rc; ++i) {
980                         printf("%02x", value[i]);
981                 }
982                 printf("\n");
983         }
984 #endif
985
986 #if 0
987         // set up an asynchronous transfer of the timer register
988         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
989         static int completed = 0;
990
991         xfr = libusb_alloc_transfer(0);
992         libusb_fill_control_setup(cmdbuf,
993             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
994                 /*index=*/44, /*length=*/4);
995         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
996         xfr->user_data = this;
997         libusb_submit_transfer(xfr);
998
999         // set up an asynchronous transfer of register 24
1000         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1001         static int completed2 = 0;
1002
1003         xfr = libusb_alloc_transfer(0);
1004         libusb_fill_control_setup(cmdbuf2,
1005             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1006                 /*index=*/24, /*length=*/4);
1007         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1008         xfr->user_data = this;
1009         libusb_submit_transfer(xfr);
1010 #endif
1011
1012         // set up an asynchronous transfer of the register dump
1013         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1014         static int completed3 = 0;
1015
1016         xfr = libusb_alloc_transfer(0);
1017         libusb_fill_control_setup(cmdbuf3,
1018             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1019                 /*index=*/current_register, /*length=*/4);
1020         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1021         xfr->user_data = this;
1022         //libusb_submit_transfer(xfr);
1023
1024         //audiofp = fopen("audio.raw", "wb");
1025
1026         // set up isochronous transfers for audio and video
1027         for (int e = 3; e <= 4; ++e) {
1028                 //int num_transfers = (e == 3) ? 6 : 6;
1029                 int num_transfers = 10;
1030                 for (int i = 0; i < num_transfers; ++i) {
1031                         size_t buf_size;
1032                         int num_iso_pack, size;
1033                         if (e == 3) {
1034                                 // Allocate for minimum width (because that will give us the most
1035                                 // number of packets, so we don't need to reallocated, but we'll
1036                                 // default to 720p for the first frame.
1037                                 size = find_xfer_size_for_width(MIN_WIDTH);
1038                                 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1039                                 buf_size = USB_VIDEO_TRANSFER_SIZE;
1040                         } else {
1041                                 size = 0xc0;
1042                                 num_iso_pack = 80;
1043                                 buf_size = num_iso_pack * size;
1044                         }
1045                         assert(size_t(num_iso_pack * size) <= buf_size);
1046                         uint8_t *buf = new uint8_t[buf_size];
1047
1048                         xfr = libusb_alloc_transfer(num_iso_pack);
1049                         if (!xfr) {
1050                                 fprintf(stderr, "oom\n");
1051                                 exit(1);
1052                         }
1053
1054                         int ep = LIBUSB_ENDPOINT_IN | e;
1055                         libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1056                                 num_iso_pack, cb_xfr, nullptr, 0);
1057                         libusb_set_iso_packet_lengths(xfr, size);
1058                         xfr->user_data = this;
1059
1060                         if (e == 3) {
1061                                 change_xfer_size_for_width(assumed_frame_width, xfr);
1062                         }
1063
1064                         iso_xfrs.push_back(xfr);
1065                 }
1066         }
1067 }
1068
1069 void BMUSBCapture::start_bm_capture()
1070 {
1071         int i = 0;
1072         for (libusb_transfer *xfr : iso_xfrs) {
1073                 int rc = libusb_submit_transfer(xfr);
1074                 ++i;
1075                 if (rc < 0) {
1076                         //printf("num_bytes=%d\n", num_bytes);
1077                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1078                                 xfr->endpoint, i, libusb_error_name(rc));
1079                         exit(1);
1080                 }
1081         }
1082
1083
1084 #if 0
1085         libusb_release_interface(devh, 0);
1086 out:
1087         if (devh)
1088                 libusb_close(devh);
1089         libusb_exit(nullptr);
1090         return rc;
1091 #endif
1092 }
1093
1094 void BMUSBCapture::stop_dequeue_thread()
1095 {
1096         dequeue_thread_should_quit = true;
1097         queues_not_empty.notify_all();
1098         dequeue_thread.join();
1099 }
1100
1101 void BMUSBCapture::start_bm_thread()
1102 {
1103         should_quit = false;
1104         usb_thread = thread(&BMUSBCapture::usb_thread_func);
1105 }
1106
1107 void BMUSBCapture::stop_bm_thread()
1108 {
1109         should_quit = true;
1110         usb_thread.join();
1111 }
1112
1113 struct VideoFormatEntry {
1114         uint16_t normalized_video_format;
1115         unsigned width, height, second_field_start;
1116         unsigned extra_lines_top, extra_lines_bottom;
1117         unsigned frame_rate_nom, frame_rate_den;
1118         bool interlaced;
1119 };
1120
1121 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1122 {
1123         decoded_video_format->id = video_format;
1124         decoded_video_format->interlaced = false;
1125
1126         // TODO: Add these for all formats as we find them.
1127         decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1128
1129         if (video_format == 0x0800) {
1130                 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1131                 // It's a strange thing, but what can you do.
1132                 decoded_video_format->width = 720;
1133                 decoded_video_format->height = 525;
1134                 decoded_video_format->extra_lines_top = 0;
1135                 decoded_video_format->extra_lines_bottom = 0;
1136                 decoded_video_format->frame_rate_nom = 3013;
1137                 decoded_video_format->frame_rate_den = 100;
1138                 decoded_video_format->has_signal = false;
1139                 return true;
1140         }
1141         if ((video_format & 0xe800) != 0xe800) {
1142                 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1143                         video_format);
1144                 decoded_video_format->width = 0;
1145                 decoded_video_format->height = 0;
1146                 decoded_video_format->extra_lines_top = 0;
1147                 decoded_video_format->extra_lines_bottom = 0;
1148                 decoded_video_format->frame_rate_nom = 60;
1149                 decoded_video_format->frame_rate_den = 1;
1150                 decoded_video_format->has_signal = false;
1151                 return false;
1152         }
1153
1154         decoded_video_format->has_signal = true;
1155
1156         // NTSC (480i59.94, I suppose). A special case, see below.
1157         if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1158                 decoded_video_format->width = 720;
1159                 decoded_video_format->height = 480;
1160                 decoded_video_format->extra_lines_top = 17;
1161                 decoded_video_format->extra_lines_bottom = 28;
1162                 decoded_video_format->frame_rate_nom = 30000;
1163                 decoded_video_format->frame_rate_den = 1001;
1164                 decoded_video_format->second_field_start = 280;
1165                 decoded_video_format->interlaced = true;
1166                 return true;
1167         }
1168
1169         // PAL (576i50, I suppose). A special case, see below.
1170         if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1171                 decoded_video_format->width = 720;
1172                 decoded_video_format->height = 576;
1173                 decoded_video_format->extra_lines_top = 22;
1174                 decoded_video_format->extra_lines_bottom = 27;
1175                 decoded_video_format->frame_rate_nom = 25;
1176                 decoded_video_format->frame_rate_den = 1;
1177                 decoded_video_format->second_field_start = 335;
1178                 decoded_video_format->interlaced = true;
1179                 return true;
1180         }
1181
1182         // 0x8 seems to be a flag about availability of deep color on the input,
1183         // except when it's not (e.g. it's the only difference between NTSC
1184         // and PAL). Rather confusing. But we clear it here nevertheless, because
1185         // usually it doesn't mean anything.
1186         //
1187         // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1188         uint16_t normalized_video_format = video_format & ~0xe80c;
1189         constexpr VideoFormatEntry entries[] = {
1190                 { 0x01f1,  720,  480,   0, 40,  5, 60000, 1001, false },  // 480p59.94 (believed).
1191                 { 0x0131,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50.
1192                 { 0x0011,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50 (5:4).
1193                 { 0x0143, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
1194                 { 0x0103, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
1195                 { 0x0125, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
1196                 { 0x0121, 1280,  720,   0, 25,  5, 60000, 1001, false },  // 720p59.94.
1197                 { 0x01c3, 1920, 1080,   0,  0,  0,    30,    1, false },  // 1080p30.
1198                 { 0x0003, 1920, 1080, 583, 20, 25,    30,    1,  true },  // 1080i60.
1199                 { 0x01e1, 1920, 1080,   0,  0,  0, 30000, 1001, false },  // 1080p29.97.
1200                 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001,  true },  // 1080i59.94.
1201                 { 0x0063, 1920, 1080,   0,  0,  0,    25,    1, false },  // 1080p25.
1202                 { 0x0043, 1920, 1080,   0,  0,  0,    25,    1,  true },  // 1080p50.
1203                 { 0x008e, 1920, 1080,   0,  0,  0,    24,    1, false },  // 1080p24.
1204                 { 0x00a1, 1920, 1080,   0,  0,  0, 24000, 1001, false },  // 1080p23.98.
1205         };
1206         for (const VideoFormatEntry &entry : entries) {
1207                 if (normalized_video_format == entry.normalized_video_format) {
1208                         decoded_video_format->width = entry.width;
1209                         decoded_video_format->height = entry.height;
1210                         decoded_video_format->second_field_start = entry.second_field_start;
1211                         decoded_video_format->extra_lines_top = entry.extra_lines_top;
1212                         decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1213                         decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1214                         decoded_video_format->frame_rate_den = entry.frame_rate_den;
1215                         decoded_video_format->interlaced = entry.interlaced;
1216                         return true;
1217                 }
1218         }
1219
1220         printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1221         decoded_video_format->width = 1280;
1222         decoded_video_format->height = 720;
1223         decoded_video_format->frame_rate_nom = 60;
1224         decoded_video_format->frame_rate_den = 1;
1225         return false;
1226 }
1227
1228 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1229 {
1230         // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1231         VideoMode auto_mode;
1232         auto_mode.name = "Autodetect";
1233         auto_mode.autodetect = true;
1234         return {{ 0, auto_mode }};
1235 }
1236
1237 uint32_t BMUSBCapture::get_current_video_mode() const
1238 {
1239         return 0;  // Matches get_available_video_modes().
1240 }
1241
1242 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1243 {
1244         assert(video_mode_id == 0);  // Matches get_available_video_modes().
1245 }
1246
1247 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1248 {
1249         return {
1250                 { 0x00000000, "HDMI/SDI" },
1251                 { 0x02000000, "Component" },
1252                 { 0x04000000, "Composite" },
1253                 { 0x06000000, "S-video" }
1254         };
1255 }
1256
1257 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1258 {
1259         assert((video_input_id & ~0x06000000) == 0);
1260         current_video_input = video_input_id;
1261         update_capture_mode();
1262 }
1263
1264 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1265 {
1266         return {
1267                 { 0x00000000, "Embedded" },
1268                 { 0x10000000, "Analog" }
1269         };
1270 }
1271
1272 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1273 {
1274         assert((audio_input_id & ~0x10000000) == 0);
1275         current_audio_input = audio_input_id;
1276         update_capture_mode();
1277 }
1278
1279 void BMUSBCapture::update_capture_mode()
1280 {
1281         // clearing the 0x20000000 bit seems to activate 10-bit capture (v210).
1282         // clearing the 0x08000000 bit seems to change the capture format (other source?)
1283         uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input);
1284
1285         int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1286                 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1287         if (rc < 0) {
1288                 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1289                 exit(1);
1290         }
1291 }