]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Stop leaking memory allocators.
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <assert.h>
8 #include <errno.h>
9 #include <libusb.h>
10 #include <netinet/in.h>
11 #include <sched.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #ifdef __SSE4_1__
17 #include <immintrin.h>
18 #endif
19 #include "bmusb.h"
20
21 #include <algorithm>
22 #include <atomic>
23 #include <condition_variable>
24 #include <cstddef>
25 #include <cstdint>
26 #include <deque>
27 #include <functional>
28 #include <memory>
29 #include <mutex>
30 #include <stack>
31 #include <string>
32 #include <thread>
33
34 using namespace std;
35 using namespace std::placeholders;
36
37 #define MIN_WIDTH 640
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
41
42 #define FRAME_SIZE (8 << 20)  // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10)  // 128 kB.
44
45 namespace {
46
47 FILE *audiofp;
48
49 thread usb_thread;
50 atomic<bool> should_quit;
51
52 int find_xfer_size_for_width(int width)
53 {
54         // Video seems to require isochronous packets scaled with the width;
55         // seemingly six lines is about right, rounded up to the required 1kB
56         // multiple.
57         int size = width * 2 * 6;
58         // Note that for 10-bit input, you'll need to increase size accordingly.
59         //size = size * 4 / 3;
60         if (size % 1024 != 0) {
61                 size &= ~1023;
62                 size += 1024;
63         }
64         return size;
65 }
66
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
68 {
69         assert(width >= MIN_WIDTH);
70         size_t size = find_xfer_size_for_width(width);
71         int num_iso_pack = xfr->length / size;
72         if (num_iso_pack != xfr->num_iso_packets ||
73             size != xfr->iso_packet_desc[0].length) {
74                 xfr->num_iso_packets = num_iso_pack;
75                 libusb_set_iso_packet_lengths(xfr, size);
76         }
77 }
78
79 }  // namespace
80
81 FrameAllocator::~FrameAllocator() {}
82
83 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
84         : frame_size(frame_size)
85 {
86         for (size_t i = 0; i < num_queued_frames; ++i) {
87                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
88         }
89 }
90
91 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
92 {
93         Frame vf;
94         vf.owner = this;
95
96         unique_lock<mutex> lock(freelist_mutex);  // Meh.
97         if (freelist.empty()) {
98                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
99                         frame_size);
100         } else {
101                 vf.data = freelist.top().release();
102                 vf.size = frame_size;
103                 freelist.pop();  // Meh.
104         }
105         return vf;
106 }
107
108 void MallocFrameAllocator::release_frame(Frame frame)
109 {
110         if (frame.overflow > 0) {
111                 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
112         }
113         unique_lock<mutex> lock(freelist_mutex);
114         freelist.push(unique_ptr<uint8_t[]>(frame.data));
115 }
116
117 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
118 {
119         if (a == b) {
120                 return false;
121         } else if (a < b) {
122                 return (b - a < 0x8000);
123         } else {
124                 int wrap_b = 0x10000 + int(b);
125                 return (wrap_b - a < 0x8000);
126         }
127 }
128
129 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
130 {
131         unique_lock<mutex> lock(queue_lock);
132         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
133                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
134                         q->back().timecode, timecode);
135                 frame.owner->release_frame(frame);
136                 return;
137         }
138
139         QueuedFrame qf;
140         qf.format = format;
141         qf.timecode = timecode;
142         qf.frame = frame;
143         q->push_back(move(qf));
144         queues_not_empty.notify_one();  // might be spurious
145 }
146
147 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
148 {
149         FILE *fp = fopen(filename, "wb");
150         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
151                 printf("short write!\n");
152         }
153         fclose(fp);
154 }
155
156 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
157 {
158         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
159 }
160
161 void BMUSBCapture::dequeue_thread_func()
162 {
163         if (has_dequeue_callbacks) {
164                 dequeue_init_callback();
165         }
166         while (!dequeue_thread_should_quit) {
167                 unique_lock<mutex> lock(queue_lock);
168                 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
169
170                 if (dequeue_thread_should_quit) break;
171
172                 uint16_t video_timecode = pending_video_frames.front().timecode;
173                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
174                 AudioFormat audio_format;
175                 audio_format.bits_per_sample = 24;
176                 audio_format.num_channels = 8;
177                 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
178                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
179                                 video_timecode);
180                         QueuedFrame video_frame = pending_video_frames.front();
181                         pending_video_frames.pop_front();
182                         lock.unlock();
183                         video_frame_allocator->release_frame(video_frame.frame);
184                 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
185                         printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
186                                 audio_timecode);
187                         QueuedFrame audio_frame = pending_audio_frames.front();
188                         pending_audio_frames.pop_front();
189                         lock.unlock();
190                         audio_format.id = audio_frame.format;
191
192                         // Use the video format of the pending frame.
193                         QueuedFrame video_frame = pending_video_frames.front();
194                         VideoFormat video_format;
195                         decode_video_format(video_frame.format, &video_format);
196
197                         frame_callback(audio_timecode,
198                                        FrameAllocator::Frame(), 0, video_format,
199                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
200                 } else {
201                         QueuedFrame video_frame = pending_video_frames.front();
202                         QueuedFrame audio_frame = pending_audio_frames.front();
203                         pending_audio_frames.pop_front();
204                         pending_video_frames.pop_front();
205                         lock.unlock();
206
207 #if 0
208                         char filename[255];
209                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
210                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
211                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
212 #endif
213
214                         VideoFormat video_format;
215                         audio_format.id = audio_frame.format;
216                         if (decode_video_format(video_frame.format, &video_format)) {
217                                 frame_callback(video_timecode,
218                                                video_frame.frame, HEADER_SIZE, video_format,
219                                                audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
220                         } else {
221                                 frame_callback(video_timecode,
222                                                FrameAllocator::Frame(), 0, video_format,
223                                                audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
224                         }
225                 }
226         }
227         if (has_dequeue_callbacks) {
228                 dequeue_cleanup_callback();
229         }
230 }
231
232 void BMUSBCapture::start_new_frame(const uint8_t *start)
233 {
234         uint16_t format = (start[3] << 8) | start[2];
235         uint16_t timecode = (start[1] << 8) | start[0];
236
237         if (current_video_frame.len > 0) {
238                 // If format is 0x0800 (no signal), add a fake (empty) audio
239                 // frame to get it out of the queue.
240                 // TODO: Figure out if there are other formats that come with
241                 // no audio, and treat them the same.
242                 if (format == 0x0800) {
243                         FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
244                         if (fake_audio_frame.data == nullptr) {
245                                 // Oh well, it's just a no-signal frame anyway.
246                                 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
247                                 current_video_frame.owner->release_frame(current_video_frame);
248                                 current_video_frame = video_frame_allocator->alloc_frame();
249                                 return;
250                         }
251                         queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
252                 }
253                 //dump_frame();
254                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
255
256                 // Update the assumed frame width. We might be one frame too late on format changes,
257                 // but it's much better than asking the user to choose manually.
258                 VideoFormat video_format;
259                 if (decode_video_format(format, &video_format)) {
260                         assumed_frame_width = video_format.width;
261                 }
262         }
263         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
264         //      format, timecode,
265         //      //start[7], start[6], start[5], start[4],
266         //      read_current_frame, FRAME_SIZE);
267
268         current_video_frame = video_frame_allocator->alloc_frame();
269         //if (current_video_frame.data == nullptr) {
270         //      read_current_frame = -1;
271         //} else {
272         //      read_current_frame = 0;
273         //}
274 }
275
276 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
277 {
278         uint16_t format = (start[3] << 8) | start[2];
279         uint16_t timecode = (start[1] << 8) | start[0];
280         if (current_audio_frame.len > 0) {
281                 //dump_audio_block();
282                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
283         }
284         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
285         //      format, timecode, read_current_audio_block);
286         current_audio_frame = audio_frame_allocator->alloc_frame();
287 }
288
289 #if 0
290 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
291 {
292         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
293         for (unsigned j = 0; j < pack->actual_length; j++) {
294         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
295                 printf("%02x", xfr->buffer[j + offset]);
296                 if ((j % 16) == 15)
297                         printf("\n");
298                 else if ((j % 8) == 7)
299                         printf("  ");
300                 else
301                         printf(" ");
302         }
303 }
304 #endif
305
306 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
307 {
308         assert(n % 2 == 0);
309         uint8_t *dptr1 = dest1;
310         uint8_t *dptr2 = dest2;
311
312         for (size_t i = 0; i < n; i += 2) {
313                 *dptr1++ = *src++;
314                 *dptr2++ = *src++;
315         }
316 }
317
318 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
319 {
320         if (current_frame->data == nullptr ||
321             current_frame->len > current_frame->size ||
322             start == end) {
323                 return;
324         }
325
326         int bytes = end - start;
327         if (current_frame->len + bytes > current_frame->size) {
328                 current_frame->overflow = current_frame->len + bytes - current_frame->size;
329                 current_frame->len = current_frame->size;
330                 if (current_frame->overflow > 1048576) {
331                         printf("%d bytes overflow after last %s frame\n",
332                                 int(current_frame->overflow), frame_type_name);
333                         current_frame->overflow = 0;
334                 }
335                 //dump_frame();
336         } else {
337                 if (current_frame->interleaved) {
338                         uint8_t *data = current_frame->data + current_frame->len / 2;
339                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
340                         if (current_frame->len % 2 == 1) {
341                                 ++data;
342                                 swap(data, data2);
343                         }
344                         if (bytes % 2 == 1) {
345                                 *data++ = *start++;
346                                 swap(data, data2);
347                                 ++current_frame->len;
348                                 --bytes;
349                         }
350                         memcpy_interleaved(data, data2, start, bytes);
351                         current_frame->len += bytes;
352                 } else {
353                         memcpy(current_frame->data + current_frame->len, start, bytes);
354                         current_frame->len += bytes;
355                 }
356         }
357 }
358
359 #ifdef __SSE4_1__
360
361 #if 0
362 void avx2_dump(const char *name, __m256i n)
363 {
364         printf("%-10s:", name);
365         printf(" %02x", _mm256_extract_epi8(n, 0));
366         printf(" %02x", _mm256_extract_epi8(n, 1));
367         printf(" %02x", _mm256_extract_epi8(n, 2));
368         printf(" %02x", _mm256_extract_epi8(n, 3));
369         printf(" %02x", _mm256_extract_epi8(n, 4));
370         printf(" %02x", _mm256_extract_epi8(n, 5));
371         printf(" %02x", _mm256_extract_epi8(n, 6));
372         printf(" %02x", _mm256_extract_epi8(n, 7));
373         printf(" ");
374         printf(" %02x", _mm256_extract_epi8(n, 8));
375         printf(" %02x", _mm256_extract_epi8(n, 9));
376         printf(" %02x", _mm256_extract_epi8(n, 10));
377         printf(" %02x", _mm256_extract_epi8(n, 11));
378         printf(" %02x", _mm256_extract_epi8(n, 12));
379         printf(" %02x", _mm256_extract_epi8(n, 13));
380         printf(" %02x", _mm256_extract_epi8(n, 14));
381         printf(" %02x", _mm256_extract_epi8(n, 15));
382         printf(" ");
383         printf(" %02x", _mm256_extract_epi8(n, 16));
384         printf(" %02x", _mm256_extract_epi8(n, 17));
385         printf(" %02x", _mm256_extract_epi8(n, 18));
386         printf(" %02x", _mm256_extract_epi8(n, 19));
387         printf(" %02x", _mm256_extract_epi8(n, 20));
388         printf(" %02x", _mm256_extract_epi8(n, 21));
389         printf(" %02x", _mm256_extract_epi8(n, 22));
390         printf(" %02x", _mm256_extract_epi8(n, 23));
391         printf(" ");
392         printf(" %02x", _mm256_extract_epi8(n, 24));
393         printf(" %02x", _mm256_extract_epi8(n, 25));
394         printf(" %02x", _mm256_extract_epi8(n, 26));
395         printf(" %02x", _mm256_extract_epi8(n, 27));
396         printf(" %02x", _mm256_extract_epi8(n, 28));
397         printf(" %02x", _mm256_extract_epi8(n, 29));
398         printf(" %02x", _mm256_extract_epi8(n, 30));
399         printf(" %02x", _mm256_extract_epi8(n, 31));
400         printf("\n");
401 }
402 #endif
403
404 // Does a memcpy and memchr in one to reduce processing time.
405 // Note that the benefit is somewhat limited if your L3 cache is small,
406 // as you'll (unfortunately) spend most of the time loading the data
407 // from main memory.
408 //
409 // Complicated cases are left to the slow path; it basically stops copying
410 // up until the first instance of "sync_char" (usually a bit before, actually).
411 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
412 // data, and what we really need this for is the 00 00 ff ff marker in video data.
413 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
414 {
415         if (current_frame->data == nullptr ||
416             current_frame->len > current_frame->size ||
417             start == limit) {
418                 return start;
419         }
420         size_t orig_bytes = limit - start;
421         if (orig_bytes < 128) {
422                 // Don't bother.
423                 return start;
424         }
425
426         // Don't read more bytes than we can write.
427         limit = min(limit, start + (current_frame->size - current_frame->len));
428
429         // Align end to 32 bytes.
430         limit = (const uint8_t *)(intptr_t(limit) & ~31);
431
432         if (start >= limit) {
433                 return start;
434         }
435
436         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
437         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
438         if (aligned_start != start) {
439                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
440                 if (sync_start == nullptr) {
441                         add_to_frame(current_frame, "", start, aligned_start);
442                 } else {
443                         add_to_frame(current_frame, "", start, sync_start);
444                         return sync_start;
445                 }
446         }
447
448         // Make the length a multiple of 64.
449         if (current_frame->interleaved) {
450                 if (((limit - aligned_start) % 64) != 0) {
451                         limit -= 32;
452                 }
453                 assert(((limit - aligned_start) % 64) == 0);
454         }
455
456 #if __AVX2__
457         const __m256i needle = _mm256_set1_epi8(sync_char);
458
459         const __restrict __m256i *in = (const __m256i *)aligned_start;
460         if (current_frame->interleaved) {
461                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
462                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
463                 if (current_frame->len % 2 == 1) {
464                         swap(out1, out2);
465                 }
466
467                 __m256i shuffle_cw = _mm256_set_epi8(
468                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
469                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
470                 while (in < (const __m256i *)limit) {
471                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
472                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
473                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
474
475                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
476                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
477                         __m256i found = _mm256_or_si256(found1, found2);
478
479                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
480                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
481                 
482                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
483                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
484
485                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
486                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
487
488                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
489                         _mm256_storeu_si256(out2, hi);
490
491                         if (!_mm256_testz_si256(found, found)) {
492                                 break;
493                         }
494
495                         in += 2;
496                         ++out1;
497                         ++out2;
498                 }
499                 current_frame->len += (uint8_t *)in - aligned_start;
500         } else {
501                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
502                 while (in < (const __m256i *)limit) {
503                         __m256i data = _mm256_load_si256(in);
504                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
505                         __m256i found = _mm256_cmpeq_epi8(data, needle);
506                         if (!_mm256_testz_si256(found, found)) {
507                                 break;
508                         }
509
510                         ++in;
511                         ++out;
512                 }
513                 current_frame->len = (uint8_t *)out - current_frame->data;
514         }
515 #else
516         const __m128i needle = _mm_set1_epi8(sync_char);
517
518         const __m128i *in = (const __m128i *)aligned_start;
519         if (current_frame->interleaved) {
520                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
521                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
522                 if (current_frame->len % 2 == 1) {
523                         swap(out1, out2);
524                 }
525
526                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
527                 while (in < (const __m128i *)limit) {
528                         __m128i data1 = _mm_load_si128(in);
529                         __m128i data2 = _mm_load_si128(in + 1);
530                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
531                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
532                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
533                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
534                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
535                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
536                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
537                         _mm_storeu_si128(out2, hi);
538                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
539                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
540                         if (!_mm_testz_si128(found1, found1) ||
541                             !_mm_testz_si128(found2, found2)) {
542                                 break;
543                         }
544
545                         in += 2;
546                         ++out1;
547                         ++out2;
548                 }
549                 current_frame->len += (uint8_t *)in - aligned_start;
550         } else {
551                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
552                 while (in < (const __m128i *)limit) {
553                         __m128i data = _mm_load_si128(in);
554                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
555                         __m128i found = _mm_cmpeq_epi8(data, needle);
556                         if (!_mm_testz_si128(found, found)) {
557                                 break;
558                         }
559
560                         ++in;
561                         ++out;
562                 }
563                 current_frame->len = (uint8_t *)out - current_frame->data;
564         }
565 #endif
566
567         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
568
569         return (const uint8_t *)in;
570 }
571 #endif
572
573 void decode_packs(const libusb_transfer *xfr,
574                   const char *sync_pattern,
575                   int sync_length,
576                   FrameAllocator::Frame *current_frame,
577                   const char *frame_type_name,
578                   function<void(const uint8_t *start)> start_callback)
579 {
580         int offset = 0;
581         for (int i = 0; i < xfr->num_iso_packets; i++) {
582                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
583
584                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
585                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
586                         continue;
587 //exit(5);
588                 }
589
590                 const uint8_t *start = xfr->buffer + offset;
591                 const uint8_t *limit = start + pack->actual_length;
592                 while (start < limit) {  // Usually runs only one iteration.
593 #ifdef __SSE4_1__
594                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
595                         if (start == limit) break;
596                         assert(start < limit);
597 #endif
598
599                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
600                         if (start_next_frame == nullptr) {
601                                 // add the rest of the buffer
602                                 add_to_frame(current_frame, frame_type_name, start, limit);
603                                 break;
604                         } else {
605                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
606                                 start = start_next_frame + sync_length;  // skip sync
607                                 start_callback(start);
608                         }
609                 }
610 #if 0
611                 dump_pack(xfr, offset, pack);
612 #endif
613                 offset += pack->length;
614         }
615 }
616
617 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
618 {
619         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
620                 fprintf(stderr, "transfer status %d\n", xfr->status);
621                 libusb_free_transfer(xfr);
622                 exit(3);
623         }
624
625         assert(xfr->user_data != nullptr);
626         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
627
628         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
629                 if (xfr->endpoint == 0x84) {
630                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
631                 } else {
632                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
633
634                         // Update the transfer with the new assumed width, if we're in the process of changing formats.
635                         change_xfer_size_for_width(usb->assumed_frame_width, xfr);
636                 }
637         }
638         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
639                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
640                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
641 #if 0
642                 if (setup->wIndex == 44) {
643                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
644                 } else {
645                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
646                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
647                 }
648 #else
649                 memcpy(usb->register_file + usb->current_register, buf, 4);
650                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
651                 if (usb->current_register == 0) {
652                         // read through all of them
653                         printf("register dump:");
654                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
655                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
656                         }
657                         printf("\n");
658                 }
659                 libusb_fill_control_setup(xfr->buffer,
660                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
661                         /*index=*/usb->current_register, /*length=*/4);
662 #endif
663         }
664
665 #if 0
666         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
667         for (i = 0; i < xfr->actual_length; i++) {
668                 printf("%02x", xfr->buffer[i]);
669                 if (i % 16)
670                         printf("\n");
671                 else if (i % 8)
672                         printf("  ");
673                 else
674                         printf(" ");
675         }
676 #endif
677
678         int rc = libusb_submit_transfer(xfr);
679         if (rc < 0) {
680                 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
681                 exit(1);
682         }
683 }
684
685 void BMUSBCapture::usb_thread_func()
686 {
687         sched_param param;
688         memset(&param, 0, sizeof(param));
689         param.sched_priority = 1;
690         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
691                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
692         }
693         while (!should_quit) {
694                 int rc = libusb_handle_events(nullptr);
695                 if (rc != LIBUSB_SUCCESS)
696                         break;
697         }
698 }
699
700 struct USBCardDevice {
701         uint16_t product;
702         uint8_t bus, port;
703         libusb_device *device;
704 };
705
706 libusb_device_handle *open_card(int card_index, string *description)
707 {       
708         libusb_device **devices;
709         ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
710         if (num_devices == -1) {
711                 fprintf(stderr, "Error finding USB devices\n");
712                 exit(1);
713         }
714         vector<USBCardDevice> found_cards;
715         for (ssize_t i = 0; i < num_devices; ++i) {
716                 libusb_device_descriptor desc;
717                 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
718                         fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
719                         exit(1);
720                 }
721
722                 uint8_t bus = libusb_get_bus_number(devices[i]);
723                 uint8_t port = libusb_get_port_number(devices[i]);
724
725                 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
726                     !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
727                         libusb_unref_device(devices[i]);
728                         continue;
729                 }
730
731                 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
732         }
733         libusb_free_device_list(devices, 0);
734
735         // Sort the devices to get a consistent ordering.
736         sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
737                 if (a.product != b.product)
738                         return a.product < b.product;
739                 if (a.bus != b.bus)
740                         return a.bus < b.bus;
741                 return a.port < b.port;
742         });
743
744         for (size_t i = 0; i < found_cards.size(); ++i) {
745                 const char *product_name = nullptr;
746                 if (found_cards[i].product == 0xbd3b) {
747                         product_name = "Intensity Shuttle";
748                 } else if (found_cards[i].product == 0xbd4f) {
749                         product_name = "UltraStudio SDI";
750                 } else {
751                         assert(false);
752                 }
753
754                 char buf[256];
755                 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u  %s",
756                         int(i), found_cards[i].bus, found_cards[i].port, product_name);
757                 if (i == size_t(card_index)) {
758                         *description = buf;
759                 }
760                 fprintf(stderr, "%s\n", buf);
761         }
762
763         if (size_t(card_index) >= found_cards.size()) {
764                 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
765                 exit(1);
766         }
767
768         libusb_device_handle *devh;
769         int rc = libusb_open(found_cards[card_index].device, &devh);
770         if (rc < 0) {
771                 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
772                 exit(1);
773         }
774
775         for (size_t i = 0; i < found_cards.size(); ++i) {
776                 libusb_unref_device(found_cards[i].device);
777         }
778
779         return devh;
780 }
781
782 void BMUSBCapture::configure_card()
783 {
784         if (video_frame_allocator == nullptr) {
785                 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
786                 set_video_frame_allocator(owned_video_frame_allocator.get());
787         }
788         if (audio_frame_allocator == nullptr) {
789                 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
790                 set_audio_frame_allocator(owned_audio_frame_allocator.get());
791         }
792         dequeue_thread_should_quit = false;
793         dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
794
795         int rc;
796         struct libusb_transfer *xfr;
797
798         rc = libusb_init(nullptr);
799         if (rc < 0) {
800                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
801                 exit(1);
802         }
803
804         devh = open_card(card_index, &description);
805         if (!devh) {
806                 fprintf(stderr, "Error finding USB device\n");
807                 exit(1);
808         }
809
810         libusb_config_descriptor *config;
811         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
812         if (rc < 0) {
813                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
814                 exit(1);
815         }
816
817 #if 0
818         printf("%d interface\n", config->bNumInterfaces);
819         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
820                 printf("  interface %d\n", interface_number);
821                 const libusb_interface *interface = &config->interface[interface_number];
822                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
823                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
824                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
825                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
826                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
827                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
828                         }
829                 }
830         }
831 #endif
832
833         rc = libusb_set_configuration(devh, /*configuration=*/1);
834         if (rc < 0) {
835                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
836                 exit(1);
837         }
838
839         rc = libusb_claim_interface(devh, 0);
840         if (rc < 0) {
841                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
842                 exit(1);
843         }
844
845         // Alternate setting 1 is output, alternate setting 2 is input.
846         // Card is reset when switching alternates, so the driver uses
847         // this “double switch” when it wants to reset.
848         //
849         // There's also alternate settings 3 and 4, which seem to be
850         // like 1 and 2 except they advertise less bandwidth needed.
851         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
852         if (rc < 0) {
853                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
854                 exit(1);
855         }
856         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
857         if (rc < 0) {
858                 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
859                 exit(1);
860         }
861 #if 0
862         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
863         if (rc < 0) {
864                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
865                 exit(1);
866         }
867 #endif
868
869 #if 0
870         rc = libusb_claim_interface(devh, 3);
871         if (rc < 0) {
872                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
873                 exit(1);
874         }
875 #endif
876
877         // theories:
878         //   44 is some kind of timer register (first 16 bits count upwards)
879         //   24 is some sort of watchdog?
880         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
881         //      (or will go to 0x73c60010?), also seen 0x73c60100
882         //   12 also changes all the time, unclear why  
883         //   16 seems to be autodetected mode somehow
884         //      --    this is e00115e0 after reset?
885         //                    ed0115e0 after mode change [to output?]
886         //                    2d0015e0 after more mode change [to input]
887         //                    ed0115e0 after more mode change
888         //                    2d0015e0 after more mode change
889         //
890         //                    390115e0 seems to indicate we have signal
891         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
892         //
893         //                    200015e0 on startup
894         //         changes to 250115e0 when we sync to the signal
895         //
896         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
897         //
898         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
899         //
900         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
901         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
902         //
903         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
904         //    perhaps some of them are related to analog output?
905         //
906         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
907         //    but the driver sets it to 0x8036802a at some point.
908         //
909         //    all of this is on request 214/215. other requests (192, 219,
910         //    222, 223, 224) are used for firmware upgrade. Probably best to
911         //    stay out of it unless you know what you're doing.
912         //
913         //
914         // register 16:
915         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
916         //
917         // theories:
918         //   0x01 - stable signal
919         //   0x04 - deep color
920         //   0x08 - unknown (audio??)
921         //   0x20 - 720p??
922         //   0x30 - 576p??
923
924         update_capture_mode();
925
926         struct ctrl {
927                 int endpoint;
928                 int request;
929                 int index;
930                 uint32_t data;
931         };
932         static const ctrl ctrls[] = {
933                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
934                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
935
936                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
937                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
938                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
939                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
940         };
941
942         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
943                 uint32_t flipped = htonl(ctrls[req].data);
944                 static uint8_t value[4];
945                 memcpy(value, &flipped, sizeof(flipped));
946                 int size = sizeof(value);
947                 //if (ctrls[req].request == 215) size = 0;
948                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
949                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
950                 if (rc < 0) {
951                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
952                         exit(1);
953                 }
954
955                 if (ctrls[req].index == 16 && rc == 4) {
956                         printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
957                 }
958
959 #if 0
960                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
961                 for (int i = 0; i < rc; ++i) {
962                         printf("%02x", value[i]);
963                 }
964                 printf("\n");
965 #endif
966         }
967
968 #if 0
969         // DEBUG
970         for ( ;; ) {
971                 static int my_index = 0;
972                 static uint8_t value[4];
973                 int size = sizeof(value);
974                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
975                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
976                 if (rc < 0) {
977                         fprintf(stderr, "Error on control\n");
978                         exit(1);
979                 }
980                 printf("rc=%d index=%d: 0x", rc, my_index);
981                 for (int i = 0; i < rc; ++i) {
982                         printf("%02x", value[i]);
983                 }
984                 printf("\n");
985         }
986 #endif
987
988 #if 0
989         // set up an asynchronous transfer of the timer register
990         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
991         static int completed = 0;
992
993         xfr = libusb_alloc_transfer(0);
994         libusb_fill_control_setup(cmdbuf,
995             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
996                 /*index=*/44, /*length=*/4);
997         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
998         xfr->user_data = this;
999         libusb_submit_transfer(xfr);
1000
1001         // set up an asynchronous transfer of register 24
1002         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1003         static int completed2 = 0;
1004
1005         xfr = libusb_alloc_transfer(0);
1006         libusb_fill_control_setup(cmdbuf2,
1007             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1008                 /*index=*/24, /*length=*/4);
1009         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1010         xfr->user_data = this;
1011         libusb_submit_transfer(xfr);
1012 #endif
1013
1014         // set up an asynchronous transfer of the register dump
1015         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1016         static int completed3 = 0;
1017
1018         xfr = libusb_alloc_transfer(0);
1019         libusb_fill_control_setup(cmdbuf3,
1020             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1021                 /*index=*/current_register, /*length=*/4);
1022         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1023         xfr->user_data = this;
1024         //libusb_submit_transfer(xfr);
1025
1026         //audiofp = fopen("audio.raw", "wb");
1027
1028         // set up isochronous transfers for audio and video
1029         for (int e = 3; e <= 4; ++e) {
1030                 //int num_transfers = (e == 3) ? 6 : 6;
1031                 int num_transfers = 10;
1032                 for (int i = 0; i < num_transfers; ++i) {
1033                         size_t buf_size;
1034                         int num_iso_pack, size;
1035                         if (e == 3) {
1036                                 // Allocate for minimum width (because that will give us the most
1037                                 // number of packets, so we don't need to reallocated, but we'll
1038                                 // default to 720p for the first frame.
1039                                 size = find_xfer_size_for_width(MIN_WIDTH);
1040                                 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1041                                 buf_size = USB_VIDEO_TRANSFER_SIZE;
1042                         } else {
1043                                 size = 0xc0;
1044                                 num_iso_pack = 80;
1045                                 buf_size = num_iso_pack * size;
1046                         }
1047                         assert(size_t(num_iso_pack * size) <= buf_size);
1048                         uint8_t *buf = new uint8_t[buf_size];
1049
1050                         xfr = libusb_alloc_transfer(num_iso_pack);
1051                         if (!xfr) {
1052                                 fprintf(stderr, "oom\n");
1053                                 exit(1);
1054                         }
1055
1056                         int ep = LIBUSB_ENDPOINT_IN | e;
1057                         libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1058                                 num_iso_pack, cb_xfr, nullptr, 0);
1059                         libusb_set_iso_packet_lengths(xfr, size);
1060                         xfr->user_data = this;
1061
1062                         if (e == 3) {
1063                                 change_xfer_size_for_width(assumed_frame_width, xfr);
1064                         }
1065
1066                         iso_xfrs.push_back(xfr);
1067                 }
1068         }
1069 }
1070
1071 void BMUSBCapture::start_bm_capture()
1072 {
1073         int i = 0;
1074         for (libusb_transfer *xfr : iso_xfrs) {
1075                 int rc = libusb_submit_transfer(xfr);
1076                 ++i;
1077                 if (rc < 0) {
1078                         //printf("num_bytes=%d\n", num_bytes);
1079                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1080                                 xfr->endpoint, i, libusb_error_name(rc));
1081                         exit(1);
1082                 }
1083         }
1084
1085
1086 #if 0
1087         libusb_release_interface(devh, 0);
1088 out:
1089         if (devh)
1090                 libusb_close(devh);
1091         libusb_exit(nullptr);
1092         return rc;
1093 #endif
1094 }
1095
1096 void BMUSBCapture::stop_dequeue_thread()
1097 {
1098         dequeue_thread_should_quit = true;
1099         queues_not_empty.notify_all();
1100         dequeue_thread.join();
1101 }
1102
1103 void BMUSBCapture::start_bm_thread()
1104 {
1105         should_quit = false;
1106         usb_thread = thread(&BMUSBCapture::usb_thread_func);
1107 }
1108
1109 void BMUSBCapture::stop_bm_thread()
1110 {
1111         should_quit = true;
1112         usb_thread.join();
1113 }
1114
1115 struct VideoFormatEntry {
1116         uint16_t normalized_video_format;
1117         unsigned width, height, second_field_start;
1118         unsigned extra_lines_top, extra_lines_bottom;
1119         unsigned frame_rate_nom, frame_rate_den;
1120         bool interlaced;
1121 };
1122
1123 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1124 {
1125         decoded_video_format->id = video_format;
1126         decoded_video_format->interlaced = false;
1127
1128         // TODO: Add these for all formats as we find them.
1129         decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1130
1131         if (video_format == 0x0800) {
1132                 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1133                 // It's a strange thing, but what can you do.
1134                 decoded_video_format->width = 720;
1135                 decoded_video_format->height = 525;
1136                 decoded_video_format->extra_lines_top = 0;
1137                 decoded_video_format->extra_lines_bottom = 0;
1138                 decoded_video_format->frame_rate_nom = 3013;
1139                 decoded_video_format->frame_rate_den = 100;
1140                 decoded_video_format->has_signal = false;
1141                 return true;
1142         }
1143         if ((video_format & 0xe800) != 0xe800) {
1144                 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1145                         video_format);
1146                 decoded_video_format->width = 0;
1147                 decoded_video_format->height = 0;
1148                 decoded_video_format->extra_lines_top = 0;
1149                 decoded_video_format->extra_lines_bottom = 0;
1150                 decoded_video_format->frame_rate_nom = 60;
1151                 decoded_video_format->frame_rate_den = 1;
1152                 decoded_video_format->has_signal = false;
1153                 return false;
1154         }
1155
1156         decoded_video_format->has_signal = true;
1157
1158         // NTSC (480i59.94, I suppose). A special case, see below.
1159         if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1160                 decoded_video_format->width = 720;
1161                 decoded_video_format->height = 480;
1162                 decoded_video_format->extra_lines_top = 17;
1163                 decoded_video_format->extra_lines_bottom = 28;
1164                 decoded_video_format->frame_rate_nom = 30000;
1165                 decoded_video_format->frame_rate_den = 1001;
1166                 decoded_video_format->second_field_start = 280;
1167                 decoded_video_format->interlaced = true;
1168                 return true;
1169         }
1170
1171         // PAL (576i50, I suppose). A special case, see below.
1172         if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1173                 decoded_video_format->width = 720;
1174                 decoded_video_format->height = 576;
1175                 decoded_video_format->extra_lines_top = 22;
1176                 decoded_video_format->extra_lines_bottom = 27;
1177                 decoded_video_format->frame_rate_nom = 25;
1178                 decoded_video_format->frame_rate_den = 1;
1179                 decoded_video_format->second_field_start = 335;
1180                 decoded_video_format->interlaced = true;
1181                 return true;
1182         }
1183
1184         // 0x8 seems to be a flag about availability of deep color on the input,
1185         // except when it's not (e.g. it's the only difference between NTSC
1186         // and PAL). Rather confusing. But we clear it here nevertheless, because
1187         // usually it doesn't mean anything.
1188         //
1189         // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1190         uint16_t normalized_video_format = video_format & ~0xe80c;
1191         constexpr VideoFormatEntry entries[] = {
1192                 { 0x01f1,  720,  480,   0, 40,  5, 60000, 1001, false },  // 480p59.94 (believed).
1193                 { 0x0131,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50.
1194                 { 0x0011,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50 (5:4).
1195                 { 0x0143, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
1196                 { 0x0103, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
1197                 { 0x0125, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
1198                 { 0x0121, 1280,  720,   0, 25,  5, 60000, 1001, false },  // 720p59.94.
1199                 { 0x01c3, 1920, 1080,   0,  0,  0,    30,    1, false },  // 1080p30.
1200                 { 0x0003, 1920, 1080, 583, 20, 25,    30,    1,  true },  // 1080i60.
1201                 { 0x01e1, 1920, 1080,   0,  0,  0, 30000, 1001, false },  // 1080p29.97.
1202                 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001,  true },  // 1080i59.94.
1203                 { 0x0063, 1920, 1080,   0,  0,  0,    25,    1, false },  // 1080p25.
1204                 { 0x0043, 1920, 1080,   0,  0,  0,    25,    1,  true },  // 1080p50.
1205                 { 0x008e, 1920, 1080,   0,  0,  0,    24,    1, false },  // 1080p24.
1206                 { 0x00a1, 1920, 1080,   0,  0,  0, 24000, 1001, false },  // 1080p23.98.
1207         };
1208         for (const VideoFormatEntry &entry : entries) {
1209                 if (normalized_video_format == entry.normalized_video_format) {
1210                         decoded_video_format->width = entry.width;
1211                         decoded_video_format->height = entry.height;
1212                         decoded_video_format->second_field_start = entry.second_field_start;
1213                         decoded_video_format->extra_lines_top = entry.extra_lines_top;
1214                         decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1215                         decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1216                         decoded_video_format->frame_rate_den = entry.frame_rate_den;
1217                         decoded_video_format->interlaced = entry.interlaced;
1218                         return true;
1219                 }
1220         }
1221
1222         printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1223         decoded_video_format->width = 1280;
1224         decoded_video_format->height = 720;
1225         decoded_video_format->frame_rate_nom = 60;
1226         decoded_video_format->frame_rate_den = 1;
1227         return false;
1228 }
1229
1230 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1231 {
1232         // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1233         VideoMode auto_mode;
1234         auto_mode.name = "Autodetect";
1235         auto_mode.autodetect = true;
1236         return {{ 0, auto_mode }};
1237 }
1238
1239 uint32_t BMUSBCapture::get_current_video_mode() const
1240 {
1241         return 0;  // Matches get_available_video_modes().
1242 }
1243
1244 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1245 {
1246         assert(video_mode_id == 0);  // Matches get_available_video_modes().
1247 }
1248
1249 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1250 {
1251         return {
1252                 { 0x00000000, "HDMI/SDI" },
1253                 { 0x02000000, "Component" },
1254                 { 0x04000000, "Composite" },
1255                 { 0x06000000, "S-video" }
1256         };
1257 }
1258
1259 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1260 {
1261         assert((video_input_id & ~0x06000000) == 0);
1262         current_video_input = video_input_id;
1263         update_capture_mode();
1264 }
1265
1266 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1267 {
1268         return {
1269                 { 0x00000000, "Embedded" },
1270                 { 0x10000000, "Analog" }
1271         };
1272 }
1273
1274 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1275 {
1276         assert((audio_input_id & ~0x10000000) == 0);
1277         current_audio_input = audio_input_id;
1278         update_capture_mode();
1279 }
1280
1281 void BMUSBCapture::update_capture_mode()
1282 {
1283         // clearing the 0x20000000 bit seems to activate 10-bit capture (v210).
1284         // clearing the 0x08000000 bit seems to change the capture format (other source?)
1285         uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input);
1286
1287         int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1288                 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1289         if (rc < 0) {
1290                 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1291                 exit(1);
1292         }
1293 }