]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Support card hotplugging (both add and remove).
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <assert.h>
8 #include <errno.h>
9 #include <libusb.h>
10 #include <unistd.h>
11 #include <netinet/in.h>
12 #include <sched.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #ifdef __SSE4_1__
18 #include <immintrin.h>
19 #endif
20 #include "bmusb.h"
21
22 #include <algorithm>
23 #include <atomic>
24 #include <condition_variable>
25 #include <cstddef>
26 #include <cstdint>
27 #include <deque>
28 #include <functional>
29 #include <memory>
30 #include <mutex>
31 #include <stack>
32 #include <string>
33 #include <thread>
34
35 using namespace std;
36 using namespace std::placeholders;
37
38 #define USB_VENDOR_BLACKMAGIC 0x1edb
39 #define MIN_WIDTH 640
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
43
44 #define FRAME_SIZE (8 << 20)  // 8 MB.
45 #define USB_VIDEO_TRANSFER_SIZE (128 << 10)  // 128 kB.
46
47 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
48
49 namespace {
50
51 FILE *audiofp;
52
53 thread usb_thread;
54 atomic<bool> should_quit;
55
56 int find_xfer_size_for_width(int width)
57 {
58         // Video seems to require isochronous packets scaled with the width;
59         // seemingly six lines is about right, rounded up to the required 1kB
60         // multiple.
61         int size = width * 2 * 6;
62         // Note that for 10-bit input, you'll need to increase size accordingly.
63         //size = size * 4 / 3;
64         if (size % 1024 != 0) {
65                 size &= ~1023;
66                 size += 1024;
67         }
68         return size;
69 }
70
71 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
72 {
73         assert(width >= MIN_WIDTH);
74         size_t size = find_xfer_size_for_width(width);
75         int num_iso_pack = xfr->length / size;
76         if (num_iso_pack != xfr->num_iso_packets ||
77             size != xfr->iso_packet_desc[0].length) {
78                 xfr->num_iso_packets = num_iso_pack;
79                 libusb_set_iso_packet_lengths(xfr, size);
80         }
81 }
82
83 }  // namespace
84
85 FrameAllocator::~FrameAllocator() {}
86
87 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
88         : frame_size(frame_size)
89 {
90         for (size_t i = 0; i < num_queued_frames; ++i) {
91                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
92         }
93 }
94
95 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
96 {
97         Frame vf;
98         vf.owner = this;
99
100         unique_lock<mutex> lock(freelist_mutex);  // Meh.
101         if (freelist.empty()) {
102                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
103                         frame_size);
104         } else {
105                 vf.data = freelist.top().release();
106                 vf.size = frame_size;
107                 freelist.pop();  // Meh.
108         }
109         return vf;
110 }
111
112 void MallocFrameAllocator::release_frame(Frame frame)
113 {
114         if (frame.overflow > 0) {
115                 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
116         }
117         unique_lock<mutex> lock(freelist_mutex);
118         freelist.push(unique_ptr<uint8_t[]>(frame.data));
119 }
120
121 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
122 {
123         if (a == b) {
124                 return false;
125         } else if (a < b) {
126                 return (b - a < 0x8000);
127         } else {
128                 int wrap_b = 0x10000 + int(b);
129                 return (wrap_b - a < 0x8000);
130         }
131 }
132
133 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
134 {
135         unique_lock<mutex> lock(queue_lock);
136         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
137                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
138                         q->back().timecode, timecode);
139                 frame.owner->release_frame(frame);
140                 return;
141         }
142
143         QueuedFrame qf;
144         qf.format = format;
145         qf.timecode = timecode;
146         qf.frame = frame;
147         q->push_back(move(qf));
148         queues_not_empty.notify_one();  // might be spurious
149 }
150
151 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
152 {
153         FILE *fp = fopen(filename, "wb");
154         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
155                 printf("short write!\n");
156         }
157         fclose(fp);
158 }
159
160 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
161 {
162         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
163 }
164
165 void BMUSBCapture::dequeue_thread_func()
166 {
167         if (has_dequeue_callbacks) {
168                 dequeue_init_callback();
169         }
170         while (!dequeue_thread_should_quit) {
171                 unique_lock<mutex> lock(queue_lock);
172                 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
173
174                 if (dequeue_thread_should_quit) break;
175
176                 uint16_t video_timecode = pending_video_frames.front().timecode;
177                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
178                 AudioFormat audio_format;
179                 audio_format.bits_per_sample = 24;
180                 audio_format.num_channels = 8;
181                 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
182                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
183                                 video_timecode);
184                         QueuedFrame video_frame = pending_video_frames.front();
185                         pending_video_frames.pop_front();
186                         lock.unlock();
187                         video_frame_allocator->release_frame(video_frame.frame);
188                 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
189                         printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
190                                 audio_timecode);
191                         QueuedFrame audio_frame = pending_audio_frames.front();
192                         pending_audio_frames.pop_front();
193                         lock.unlock();
194                         audio_format.id = audio_frame.format;
195
196                         // Use the video format of the pending frame.
197                         QueuedFrame video_frame = pending_video_frames.front();
198                         VideoFormat video_format;
199                         decode_video_format(video_frame.format, &video_format);
200
201                         frame_callback(audio_timecode,
202                                        FrameAllocator::Frame(), 0, video_format,
203                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
204                 } else {
205                         QueuedFrame video_frame = pending_video_frames.front();
206                         QueuedFrame audio_frame = pending_audio_frames.front();
207                         pending_audio_frames.pop_front();
208                         pending_video_frames.pop_front();
209                         lock.unlock();
210
211 #if 0
212                         char filename[255];
213                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
214                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
215                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
216 #endif
217
218                         VideoFormat video_format;
219                         audio_format.id = audio_frame.format;
220                         if (decode_video_format(video_frame.format, &video_format)) {
221                                 frame_callback(video_timecode,
222                                                video_frame.frame, HEADER_SIZE, video_format,
223                                                audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
224                         } else {
225                                 frame_callback(video_timecode,
226                                                FrameAllocator::Frame(), 0, video_format,
227                                                audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
228                         }
229                 }
230         }
231         if (has_dequeue_callbacks) {
232                 dequeue_cleanup_callback();
233         }
234 }
235
236 void BMUSBCapture::start_new_frame(const uint8_t *start)
237 {
238         uint16_t format = (start[3] << 8) | start[2];
239         uint16_t timecode = (start[1] << 8) | start[0];
240
241         if (current_video_frame.len > 0) {
242                 // If format is 0x0800 (no signal), add a fake (empty) audio
243                 // frame to get it out of the queue.
244                 // TODO: Figure out if there are other formats that come with
245                 // no audio, and treat them the same.
246                 if (format == 0x0800) {
247                         FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
248                         if (fake_audio_frame.data == nullptr) {
249                                 // Oh well, it's just a no-signal frame anyway.
250                                 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
251                                 current_video_frame.owner->release_frame(current_video_frame);
252                                 current_video_frame = video_frame_allocator->alloc_frame();
253                                 return;
254                         }
255                         queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
256                 }
257                 //dump_frame();
258                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
259
260                 // Update the assumed frame width. We might be one frame too late on format changes,
261                 // but it's much better than asking the user to choose manually.
262                 VideoFormat video_format;
263                 if (decode_video_format(format, &video_format)) {
264                         assumed_frame_width = video_format.width;
265                 }
266         }
267         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
268         //      format, timecode,
269         //      //start[7], start[6], start[5], start[4],
270         //      read_current_frame, FRAME_SIZE);
271
272         current_video_frame = video_frame_allocator->alloc_frame();
273         //if (current_video_frame.data == nullptr) {
274         //      read_current_frame = -1;
275         //} else {
276         //      read_current_frame = 0;
277         //}
278 }
279
280 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
281 {
282         uint16_t format = (start[3] << 8) | start[2];
283         uint16_t timecode = (start[1] << 8) | start[0];
284         if (current_audio_frame.len > 0) {
285                 //dump_audio_block();
286                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
287         }
288         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
289         //      format, timecode, read_current_audio_block);
290         current_audio_frame = audio_frame_allocator->alloc_frame();
291 }
292
293 #if 0
294 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
295 {
296         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
297         for (unsigned j = 0; j < pack->actual_length; j++) {
298         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
299                 printf("%02x", xfr->buffer[j + offset]);
300                 if ((j % 16) == 15)
301                         printf("\n");
302                 else if ((j % 8) == 7)
303                         printf("  ");
304                 else
305                         printf(" ");
306         }
307 }
308 #endif
309
310 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
311 {
312         assert(n % 2 == 0);
313         uint8_t *dptr1 = dest1;
314         uint8_t *dptr2 = dest2;
315
316         for (size_t i = 0; i < n; i += 2) {
317                 *dptr1++ = *src++;
318                 *dptr2++ = *src++;
319         }
320 }
321
322 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
323 {
324         if (current_frame->data == nullptr ||
325             current_frame->len > current_frame->size ||
326             start == end) {
327                 return;
328         }
329
330         int bytes = end - start;
331         if (current_frame->len + bytes > current_frame->size) {
332                 current_frame->overflow = current_frame->len + bytes - current_frame->size;
333                 current_frame->len = current_frame->size;
334                 if (current_frame->overflow > 1048576) {
335                         printf("%d bytes overflow after last %s frame\n",
336                                 int(current_frame->overflow), frame_type_name);
337                         current_frame->overflow = 0;
338                 }
339                 //dump_frame();
340         } else {
341                 if (current_frame->interleaved) {
342                         uint8_t *data = current_frame->data + current_frame->len / 2;
343                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
344                         if (current_frame->len % 2 == 1) {
345                                 ++data;
346                                 swap(data, data2);
347                         }
348                         if (bytes % 2 == 1) {
349                                 *data++ = *start++;
350                                 swap(data, data2);
351                                 ++current_frame->len;
352                                 --bytes;
353                         }
354                         memcpy_interleaved(data, data2, start, bytes);
355                         current_frame->len += bytes;
356                 } else {
357                         memcpy(current_frame->data + current_frame->len, start, bytes);
358                         current_frame->len += bytes;
359                 }
360         }
361 }
362
363 #ifdef __SSE4_1__
364
365 #if 0
366 void avx2_dump(const char *name, __m256i n)
367 {
368         printf("%-10s:", name);
369         printf(" %02x", _mm256_extract_epi8(n, 0));
370         printf(" %02x", _mm256_extract_epi8(n, 1));
371         printf(" %02x", _mm256_extract_epi8(n, 2));
372         printf(" %02x", _mm256_extract_epi8(n, 3));
373         printf(" %02x", _mm256_extract_epi8(n, 4));
374         printf(" %02x", _mm256_extract_epi8(n, 5));
375         printf(" %02x", _mm256_extract_epi8(n, 6));
376         printf(" %02x", _mm256_extract_epi8(n, 7));
377         printf(" ");
378         printf(" %02x", _mm256_extract_epi8(n, 8));
379         printf(" %02x", _mm256_extract_epi8(n, 9));
380         printf(" %02x", _mm256_extract_epi8(n, 10));
381         printf(" %02x", _mm256_extract_epi8(n, 11));
382         printf(" %02x", _mm256_extract_epi8(n, 12));
383         printf(" %02x", _mm256_extract_epi8(n, 13));
384         printf(" %02x", _mm256_extract_epi8(n, 14));
385         printf(" %02x", _mm256_extract_epi8(n, 15));
386         printf(" ");
387         printf(" %02x", _mm256_extract_epi8(n, 16));
388         printf(" %02x", _mm256_extract_epi8(n, 17));
389         printf(" %02x", _mm256_extract_epi8(n, 18));
390         printf(" %02x", _mm256_extract_epi8(n, 19));
391         printf(" %02x", _mm256_extract_epi8(n, 20));
392         printf(" %02x", _mm256_extract_epi8(n, 21));
393         printf(" %02x", _mm256_extract_epi8(n, 22));
394         printf(" %02x", _mm256_extract_epi8(n, 23));
395         printf(" ");
396         printf(" %02x", _mm256_extract_epi8(n, 24));
397         printf(" %02x", _mm256_extract_epi8(n, 25));
398         printf(" %02x", _mm256_extract_epi8(n, 26));
399         printf(" %02x", _mm256_extract_epi8(n, 27));
400         printf(" %02x", _mm256_extract_epi8(n, 28));
401         printf(" %02x", _mm256_extract_epi8(n, 29));
402         printf(" %02x", _mm256_extract_epi8(n, 30));
403         printf(" %02x", _mm256_extract_epi8(n, 31));
404         printf("\n");
405 }
406 #endif
407
408 // Does a memcpy and memchr in one to reduce processing time.
409 // Note that the benefit is somewhat limited if your L3 cache is small,
410 // as you'll (unfortunately) spend most of the time loading the data
411 // from main memory.
412 //
413 // Complicated cases are left to the slow path; it basically stops copying
414 // up until the first instance of "sync_char" (usually a bit before, actually).
415 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
416 // data, and what we really need this for is the 00 00 ff ff marker in video data.
417 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
418 {
419         if (current_frame->data == nullptr ||
420             current_frame->len > current_frame->size ||
421             start == limit) {
422                 return start;
423         }
424         size_t orig_bytes = limit - start;
425         if (orig_bytes < 128) {
426                 // Don't bother.
427                 return start;
428         }
429
430         // Don't read more bytes than we can write.
431         limit = min(limit, start + (current_frame->size - current_frame->len));
432
433         // Align end to 32 bytes.
434         limit = (const uint8_t *)(intptr_t(limit) & ~31);
435
436         if (start >= limit) {
437                 return start;
438         }
439
440         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
441         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
442         if (aligned_start != start) {
443                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
444                 if (sync_start == nullptr) {
445                         add_to_frame(current_frame, "", start, aligned_start);
446                 } else {
447                         add_to_frame(current_frame, "", start, sync_start);
448                         return sync_start;
449                 }
450         }
451
452         // Make the length a multiple of 64.
453         if (current_frame->interleaved) {
454                 if (((limit - aligned_start) % 64) != 0) {
455                         limit -= 32;
456                 }
457                 assert(((limit - aligned_start) % 64) == 0);
458         }
459
460 #if __AVX2__
461         const __m256i needle = _mm256_set1_epi8(sync_char);
462
463         const __restrict __m256i *in = (const __m256i *)aligned_start;
464         if (current_frame->interleaved) {
465                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
466                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
467                 if (current_frame->len % 2 == 1) {
468                         swap(out1, out2);
469                 }
470
471                 __m256i shuffle_cw = _mm256_set_epi8(
472                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
473                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
474                 while (in < (const __m256i *)limit) {
475                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
476                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
477                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
478
479                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
480                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
481                         __m256i found = _mm256_or_si256(found1, found2);
482
483                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
484                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
485                 
486                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
487                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
488
489                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
490                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
491
492                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
493                         _mm256_storeu_si256(out2, hi);
494
495                         if (!_mm256_testz_si256(found, found)) {
496                                 break;
497                         }
498
499                         in += 2;
500                         ++out1;
501                         ++out2;
502                 }
503                 current_frame->len += (uint8_t *)in - aligned_start;
504         } else {
505                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
506                 while (in < (const __m256i *)limit) {
507                         __m256i data = _mm256_load_si256(in);
508                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
509                         __m256i found = _mm256_cmpeq_epi8(data, needle);
510                         if (!_mm256_testz_si256(found, found)) {
511                                 break;
512                         }
513
514                         ++in;
515                         ++out;
516                 }
517                 current_frame->len = (uint8_t *)out - current_frame->data;
518         }
519 #else
520         const __m128i needle = _mm_set1_epi8(sync_char);
521
522         const __m128i *in = (const __m128i *)aligned_start;
523         if (current_frame->interleaved) {
524                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
525                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
526                 if (current_frame->len % 2 == 1) {
527                         swap(out1, out2);
528                 }
529
530                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
531                 while (in < (const __m128i *)limit) {
532                         __m128i data1 = _mm_load_si128(in);
533                         __m128i data2 = _mm_load_si128(in + 1);
534                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
535                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
536                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
537                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
538                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
539                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
540                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
541                         _mm_storeu_si128(out2, hi);
542                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
543                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
544                         if (!_mm_testz_si128(found1, found1) ||
545                             !_mm_testz_si128(found2, found2)) {
546                                 break;
547                         }
548
549                         in += 2;
550                         ++out1;
551                         ++out2;
552                 }
553                 current_frame->len += (uint8_t *)in - aligned_start;
554         } else {
555                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
556                 while (in < (const __m128i *)limit) {
557                         __m128i data = _mm_load_si128(in);
558                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
559                         __m128i found = _mm_cmpeq_epi8(data, needle);
560                         if (!_mm_testz_si128(found, found)) {
561                                 break;
562                         }
563
564                         ++in;
565                         ++out;
566                 }
567                 current_frame->len = (uint8_t *)out - current_frame->data;
568         }
569 #endif
570
571         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
572
573         return (const uint8_t *)in;
574 }
575 #endif
576
577 void decode_packs(const libusb_transfer *xfr,
578                   const char *sync_pattern,
579                   int sync_length,
580                   FrameAllocator::Frame *current_frame,
581                   const char *frame_type_name,
582                   function<void(const uint8_t *start)> start_callback)
583 {
584         int offset = 0;
585         for (int i = 0; i < xfr->num_iso_packets; i++) {
586                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
587
588                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
589                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
590                         continue;
591 //exit(5);
592                 }
593
594                 const uint8_t *start = xfr->buffer + offset;
595                 const uint8_t *limit = start + pack->actual_length;
596                 while (start < limit) {  // Usually runs only one iteration.
597 #ifdef __SSE4_1__
598                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
599                         if (start == limit) break;
600                         assert(start < limit);
601 #endif
602
603                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
604                         if (start_next_frame == nullptr) {
605                                 // add the rest of the buffer
606                                 add_to_frame(current_frame, frame_type_name, start, limit);
607                                 break;
608                         } else {
609                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
610                                 start = start_next_frame + sync_length;  // skip sync
611                                 start_callback(start);
612                         }
613                 }
614 #if 0
615                 dump_pack(xfr, offset, pack);
616 #endif
617                 offset += pack->length;
618         }
619 }
620
621 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
622 {
623         if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
624             xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
625                 fprintf(stderr, "error: transfer status %d\n", xfr->status);
626                 libusb_free_transfer(xfr);
627                 exit(3);
628         }
629
630         assert(xfr->user_data != nullptr);
631         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
632
633         if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
634                 if (!usb->disconnected) {
635                         fprintf(stderr, "Device went away, stopping transfers.\n");
636                         usb->disconnected = true;
637                         if (usb->card_disconnected_callback) {
638                                 usb->card_disconnected_callback();
639                         }
640                 }
641                 // Don't reschedule the transfer; the loop will stop by itself.
642                 return;
643         }
644
645         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
646                 if (xfr->endpoint == 0x84) {
647                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
648                 } else {
649                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
650
651                         // Update the transfer with the new assumed width, if we're in the process of changing formats.
652                         change_xfer_size_for_width(usb->assumed_frame_width, xfr);
653                 }
654         }
655         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
656                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
657                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
658 #if 0
659                 if (setup->wIndex == 44) {
660                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
661                 } else {
662                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
663                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
664                 }
665 #else
666                 memcpy(usb->register_file + usb->current_register, buf, 4);
667                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
668                 if (usb->current_register == 0) {
669                         // read through all of them
670                         printf("register dump:");
671                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
672                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
673                         }
674                         printf("\n");
675                 }
676                 libusb_fill_control_setup(xfr->buffer,
677                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
678                         /*index=*/usb->current_register, /*length=*/4);
679 #endif
680         }
681
682 #if 0
683         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
684         for (i = 0; i < xfr->actual_length; i++) {
685                 printf("%02x", xfr->buffer[i]);
686                 if (i % 16)
687                         printf("\n");
688                 else if (i % 8)
689                         printf("  ");
690                 else
691                         printf(" ");
692         }
693 #endif
694
695         int rc = libusb_submit_transfer(xfr);
696         if (rc < 0) {
697                 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
698                 exit(1);
699         }
700 }
701
702 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
703 {
704         if (card_connected_callback != nullptr) {
705                 libusb_device_descriptor desc;
706                 if (libusb_get_device_descriptor(dev, &desc) < 0) {
707                         fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
708                         libusb_unref_device(dev);
709                         return 1;
710                 }
711
712                 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
713                     (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
714                         card_connected_callback(dev);  // Callback takes ownership.
715                         return 0;
716                 }
717         }
718         libusb_unref_device(dev);
719         return 0;
720 }
721
722 void BMUSBCapture::usb_thread_func()
723 {
724         sched_param param;
725         memset(&param, 0, sizeof(param));
726         param.sched_priority = 1;
727         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
728                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
729         }
730         while (!should_quit) {
731                 int rc = libusb_handle_events(nullptr);
732                 if (rc != LIBUSB_SUCCESS)
733                         break;
734         }
735 }
736
737 struct USBCardDevice {
738         uint16_t product;
739         uint8_t bus, port;
740         libusb_device *device;
741 };
742
743 const char *get_product_name(uint16_t product)
744 {
745         if (product == 0xbd3b) {
746                 return "Intensity Shuttle";
747         } else if (product == 0xbd4f) {
748                 return "UltraStudio SDI";
749         } else {
750                 assert(false);
751                 return nullptr;
752         }
753 }
754
755 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
756 {
757         const char *product_name = get_product_name(product);
758
759         char buf[256];
760         snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u  %s",
761                 id, bus, port, product_name);
762         return buf;
763 }
764
765 libusb_device_handle *open_card(int card_index, string *description)
766 {
767         libusb_device **devices;
768         ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
769         if (num_devices == -1) {
770                 fprintf(stderr, "Error finding USB devices\n");
771                 exit(1);
772         }
773         vector<USBCardDevice> found_cards;
774         for (ssize_t i = 0; i < num_devices; ++i) {
775                 libusb_device_descriptor desc;
776                 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
777                         fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
778                         exit(1);
779                 }
780
781                 uint8_t bus = libusb_get_bus_number(devices[i]);
782                 uint8_t port = libusb_get_port_number(devices[i]);
783
784                 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
785                     !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
786                         libusb_unref_device(devices[i]);
787                         continue;
788                 }
789
790                 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
791         }
792         libusb_free_device_list(devices, 0);
793
794         // Sort the devices to get a consistent ordering.
795         sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
796                 if (a.product != b.product)
797                         return a.product < b.product;
798                 if (a.bus != b.bus)
799                         return a.bus < b.bus;
800                 return a.port < b.port;
801         });
802
803         for (size_t i = 0; i < found_cards.size(); ++i) {
804                 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
805                 fprintf(stderr, "%s\n", tmp_description.c_str());
806                 if (i == size_t(card_index)) {
807                         *description = tmp_description;
808                 }
809         }
810
811         if (size_t(card_index) >= found_cards.size()) {
812                 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
813                 exit(1);
814         }
815
816         libusb_device_handle *devh;
817         int rc = libusb_open(found_cards[card_index].device, &devh);
818         if (rc < 0) {
819                 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
820                 exit(1);
821         }
822
823         for (size_t i = 0; i < found_cards.size(); ++i) {
824                 libusb_unref_device(found_cards[i].device);
825         }
826
827         return devh;
828 }
829
830 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
831 {
832         uint8_t bus = libusb_get_bus_number(dev);
833         uint8_t port = libusb_get_port_number(dev);
834
835         libusb_device_descriptor desc;
836         if (libusb_get_device_descriptor(dev, &desc) < 0) {
837                 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
838                 exit(1);
839         }
840
841         *description = get_card_description(card_index, bus, port, desc.idProduct);
842
843         libusb_device_handle *devh;
844         int rc = libusb_open(dev, &devh);
845         if (rc < 0) {
846                 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
847                 exit(1);
848         }
849
850         return devh;
851 }
852
853 void BMUSBCapture::configure_card()
854 {
855         if (video_frame_allocator == nullptr) {
856                 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
857                 set_video_frame_allocator(owned_video_frame_allocator.get());
858         }
859         if (audio_frame_allocator == nullptr) {
860                 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
861                 set_audio_frame_allocator(owned_audio_frame_allocator.get());
862         }
863         dequeue_thread_should_quit = false;
864         dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
865
866         int rc;
867         struct libusb_transfer *xfr;
868
869         rc = libusb_init(nullptr);
870         if (rc < 0) {
871                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
872                 exit(1);
873         }
874
875         if (dev == nullptr) {
876                 devh = open_card(card_index, &description);
877         } else {
878                 devh = open_card(card_index, dev, &description);
879                 libusb_unref_device(dev);
880         }
881         if (!devh) {
882                 fprintf(stderr, "Error finding USB device\n");
883                 exit(1);
884         }
885
886         libusb_config_descriptor *config;
887         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
888         if (rc < 0) {
889                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
890                 exit(1);
891         }
892
893 #if 0
894         printf("%d interface\n", config->bNumInterfaces);
895         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
896                 printf("  interface %d\n", interface_number);
897                 const libusb_interface *interface = &config->interface[interface_number];
898                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
899                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
900                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
901                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
902                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
903                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
904                         }
905                 }
906         }
907 #endif
908
909         rc = libusb_set_configuration(devh, /*configuration=*/1);
910         if (rc < 0) {
911                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
912                 exit(1);
913         }
914
915         rc = libusb_claim_interface(devh, 0);
916         if (rc < 0) {
917                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
918                 exit(1);
919         }
920
921         // Alternate setting 1 is output, alternate setting 2 is input.
922         // Card is reset when switching alternates, so the driver uses
923         // this “double switch” when it wants to reset.
924         //
925         // There's also alternate settings 3 and 4, which seem to be
926         // like 1 and 2 except they advertise less bandwidth needed.
927         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
928         if (rc < 0) {
929                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
930                 exit(1);
931         }
932         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
933         if (rc < 0) {
934                 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
935                 exit(1);
936         }
937 #if 0
938         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
939         if (rc < 0) {
940                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
941                 exit(1);
942         }
943 #endif
944
945 #if 0
946         rc = libusb_claim_interface(devh, 3);
947         if (rc < 0) {
948                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
949                 exit(1);
950         }
951 #endif
952
953         // theories:
954         //   44 is some kind of timer register (first 16 bits count upwards)
955         //   24 is some sort of watchdog?
956         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
957         //      (or will go to 0x73c60010?), also seen 0x73c60100
958         //   12 also changes all the time, unclear why  
959         //   16 seems to be autodetected mode somehow
960         //      --    this is e00115e0 after reset?
961         //                    ed0115e0 after mode change [to output?]
962         //                    2d0015e0 after more mode change [to input]
963         //                    ed0115e0 after more mode change
964         //                    2d0015e0 after more mode change
965         //
966         //                    390115e0 seems to indicate we have signal
967         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
968         //
969         //                    200015e0 on startup
970         //         changes to 250115e0 when we sync to the signal
971         //
972         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
973         //
974         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
975         //
976         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
977         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
978         //
979         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
980         //    perhaps some of them are related to analog output?
981         //
982         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
983         //    but the driver sets it to 0x8036802a at some point.
984         //
985         //    all of this is on request 214/215. other requests (192, 219,
986         //    222, 223, 224) are used for firmware upgrade. Probably best to
987         //    stay out of it unless you know what you're doing.
988         //
989         //
990         // register 16:
991         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
992         //
993         // theories:
994         //   0x01 - stable signal
995         //   0x04 - deep color
996         //   0x08 - unknown (audio??)
997         //   0x20 - 720p??
998         //   0x30 - 576p??
999
1000         update_capture_mode();
1001
1002         struct ctrl {
1003                 int endpoint;
1004                 int request;
1005                 int index;
1006                 uint32_t data;
1007         };
1008         static const ctrl ctrls[] = {
1009                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
1010                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
1011
1012                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
1013                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
1014                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
1015                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
1016         };
1017
1018         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1019                 uint32_t flipped = htonl(ctrls[req].data);
1020                 static uint8_t value[4];
1021                 memcpy(value, &flipped, sizeof(flipped));
1022                 int size = sizeof(value);
1023                 //if (ctrls[req].request == 215) size = 0;
1024                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1025                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1026                 if (rc < 0) {
1027                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1028                         exit(1);
1029                 }
1030
1031                 if (ctrls[req].index == 16 && rc == 4) {
1032                         printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1033                 }
1034
1035 #if 0
1036                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1037                 for (int i = 0; i < rc; ++i) {
1038                         printf("%02x", value[i]);
1039                 }
1040                 printf("\n");
1041 #endif
1042         }
1043
1044 #if 0
1045         // DEBUG
1046         for ( ;; ) {
1047                 static int my_index = 0;
1048                 static uint8_t value[4];
1049                 int size = sizeof(value);
1050                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1051                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1052                 if (rc < 0) {
1053                         fprintf(stderr, "Error on control\n");
1054                         exit(1);
1055                 }
1056                 printf("rc=%d index=%d: 0x", rc, my_index);
1057                 for (int i = 0; i < rc; ++i) {
1058                         printf("%02x", value[i]);
1059                 }
1060                 printf("\n");
1061         }
1062 #endif
1063
1064 #if 0
1065         // set up an asynchronous transfer of the timer register
1066         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1067         static int completed = 0;
1068
1069         xfr = libusb_alloc_transfer(0);
1070         libusb_fill_control_setup(cmdbuf,
1071             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1072                 /*index=*/44, /*length=*/4);
1073         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1074         xfr->user_data = this;
1075         libusb_submit_transfer(xfr);
1076
1077         // set up an asynchronous transfer of register 24
1078         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1079         static int completed2 = 0;
1080
1081         xfr = libusb_alloc_transfer(0);
1082         libusb_fill_control_setup(cmdbuf2,
1083             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1084                 /*index=*/24, /*length=*/4);
1085         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1086         xfr->user_data = this;
1087         libusb_submit_transfer(xfr);
1088 #endif
1089
1090         // set up an asynchronous transfer of the register dump
1091         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1092         static int completed3 = 0;
1093
1094         xfr = libusb_alloc_transfer(0);
1095         libusb_fill_control_setup(cmdbuf3,
1096             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1097                 /*index=*/current_register, /*length=*/4);
1098         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1099         xfr->user_data = this;
1100         //libusb_submit_transfer(xfr);
1101
1102         //audiofp = fopen("audio.raw", "wb");
1103
1104         // set up isochronous transfers for audio and video
1105         for (int e = 3; e <= 4; ++e) {
1106                 //int num_transfers = (e == 3) ? 6 : 6;
1107                 int num_transfers = 6;
1108                 for (int i = 0; i < num_transfers; ++i) {
1109                         size_t buf_size;
1110                         int num_iso_pack, size;
1111                         if (e == 3) {
1112                                 // Allocate for minimum width (because that will give us the most
1113                                 // number of packets, so we don't need to reallocated, but we'll
1114                                 // default to 720p for the first frame.
1115                                 size = find_xfer_size_for_width(MIN_WIDTH);
1116                                 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1117                                 buf_size = USB_VIDEO_TRANSFER_SIZE;
1118                         } else {
1119                                 size = 0xc0;
1120                                 num_iso_pack = 80;
1121                                 buf_size = num_iso_pack * size;
1122                         }
1123                         int num_bytes = num_iso_pack * size;
1124                         assert(size_t(num_bytes) <= buf_size);
1125 #if LIBUSB_API_VERSION >= 0x01000105
1126                         uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1127 #else
1128                         uint8_t *buf = nullptr;
1129 #endif
1130                         if (buf == nullptr) {
1131                                 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1132 #if LIBUSB_API_VERSION >= 0x01000105
1133                                 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1134 #else
1135                                 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1136 #endif
1137                                 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1138                                 buf = new uint8_t[num_bytes];
1139                         }
1140
1141                         xfr = libusb_alloc_transfer(num_iso_pack);
1142                         if (!xfr) {
1143                                 fprintf(stderr, "oom\n");
1144                                 exit(1);
1145                         }
1146
1147                         int ep = LIBUSB_ENDPOINT_IN | e;
1148                         libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1149                                 num_iso_pack, cb_xfr, nullptr, 0);
1150                         libusb_set_iso_packet_lengths(xfr, size);
1151                         xfr->user_data = this;
1152
1153                         if (e == 3) {
1154                                 change_xfer_size_for_width(assumed_frame_width, xfr);
1155                         }
1156
1157                         iso_xfrs.push_back(xfr);
1158                 }
1159         }
1160 }
1161
1162 void BMUSBCapture::start_bm_capture()
1163 {
1164         int i = 0;
1165         for (libusb_transfer *xfr : iso_xfrs) {
1166                 int rc = libusb_submit_transfer(xfr);
1167                 ++i;
1168                 if (rc < 0) {
1169                         //printf("num_bytes=%d\n", num_bytes);
1170                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1171                                 xfr->endpoint, i, libusb_error_name(rc));
1172                         exit(1);
1173                 }
1174         }
1175
1176
1177 #if 0
1178         libusb_release_interface(devh, 0);
1179 out:
1180         if (devh)
1181                 libusb_close(devh);
1182         libusb_exit(nullptr);
1183         return rc;
1184 #endif
1185 }
1186
1187 void BMUSBCapture::stop_dequeue_thread()
1188 {
1189         dequeue_thread_should_quit = true;
1190         queues_not_empty.notify_all();
1191         dequeue_thread.join();
1192 }
1193
1194 void BMUSBCapture::start_bm_thread()
1195 {
1196         // Devices leaving are discovered by seeing the isochronous packets
1197         // coming back with errors, so only care about devices joining.
1198         if (card_connected_callback != nullptr) {
1199                 if (libusb_hotplug_register_callback(
1200                         nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, LIBUSB_HOTPLUG_NO_FLAGS,
1201                         USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1202                         &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1203                         fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1204                         exit(1);
1205                 }
1206         }
1207
1208         should_quit = false;
1209         usb_thread = thread(&BMUSBCapture::usb_thread_func);
1210 }
1211
1212 void BMUSBCapture::stop_bm_thread()
1213 {
1214         should_quit = true;
1215         usb_thread.join();
1216 }
1217
1218 struct VideoFormatEntry {
1219         uint16_t normalized_video_format;
1220         unsigned width, height, second_field_start;
1221         unsigned extra_lines_top, extra_lines_bottom;
1222         unsigned frame_rate_nom, frame_rate_den;
1223         bool interlaced;
1224 };
1225
1226 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1227 {
1228         decoded_video_format->id = video_format;
1229         decoded_video_format->interlaced = false;
1230
1231         // TODO: Add these for all formats as we find them.
1232         decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1233
1234         if (video_format == 0x0800) {
1235                 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1236                 // It's a strange thing, but what can you do.
1237                 decoded_video_format->width = 720;
1238                 decoded_video_format->height = 525;
1239                 decoded_video_format->extra_lines_top = 0;
1240                 decoded_video_format->extra_lines_bottom = 0;
1241                 decoded_video_format->frame_rate_nom = 3013;
1242                 decoded_video_format->frame_rate_den = 100;
1243                 decoded_video_format->has_signal = false;
1244                 return true;
1245         }
1246         if ((video_format & 0xe800) != 0xe800) {
1247                 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1248                         video_format);
1249                 decoded_video_format->width = 0;
1250                 decoded_video_format->height = 0;
1251                 decoded_video_format->extra_lines_top = 0;
1252                 decoded_video_format->extra_lines_bottom = 0;
1253                 decoded_video_format->frame_rate_nom = 60;
1254                 decoded_video_format->frame_rate_den = 1;
1255                 decoded_video_format->has_signal = false;
1256                 return false;
1257         }
1258
1259         decoded_video_format->has_signal = true;
1260
1261         // NTSC (480i59.94, I suppose). A special case, see below.
1262         if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1263                 decoded_video_format->width = 720;
1264                 decoded_video_format->height = 480;
1265                 decoded_video_format->extra_lines_top = 17;
1266                 decoded_video_format->extra_lines_bottom = 28;
1267                 decoded_video_format->frame_rate_nom = 30000;
1268                 decoded_video_format->frame_rate_den = 1001;
1269                 decoded_video_format->second_field_start = 280;
1270                 decoded_video_format->interlaced = true;
1271                 return true;
1272         }
1273
1274         // PAL (576i50, I suppose). A special case, see below.
1275         if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1276                 decoded_video_format->width = 720;
1277                 decoded_video_format->height = 576;
1278                 decoded_video_format->extra_lines_top = 22;
1279                 decoded_video_format->extra_lines_bottom = 27;
1280                 decoded_video_format->frame_rate_nom = 25;
1281                 decoded_video_format->frame_rate_den = 1;
1282                 decoded_video_format->second_field_start = 335;
1283                 decoded_video_format->interlaced = true;
1284                 return true;
1285         }
1286
1287         // 0x8 seems to be a flag about availability of deep color on the input,
1288         // except when it's not (e.g. it's the only difference between NTSC
1289         // and PAL). Rather confusing. But we clear it here nevertheless, because
1290         // usually it doesn't mean anything.
1291         //
1292         // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1293         uint16_t normalized_video_format = video_format & ~0xe80c;
1294         constexpr VideoFormatEntry entries[] = {
1295                 { 0x01f1,  720,  480,   0, 40,  5, 60000, 1001, false },  // 480p59.94 (believed).
1296                 { 0x0131,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50.
1297                 { 0x0011,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50 (5:4).
1298                 { 0x0143, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
1299                 { 0x0103, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
1300                 { 0x0125, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
1301                 { 0x0121, 1280,  720,   0, 25,  5, 60000, 1001, false },  // 720p59.94.
1302                 { 0x01c3, 1920, 1080,   0,  0,  0,    30,    1, false },  // 1080p30.
1303                 { 0x0003, 1920, 1080, 583, 20, 25,    30,    1,  true },  // 1080i60.
1304                 { 0x01e1, 1920, 1080,   0,  0,  0, 30000, 1001, false },  // 1080p29.97.
1305                 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001,  true },  // 1080i59.94.
1306                 { 0x0063, 1920, 1080,   0,  0,  0,    25,    1, false },  // 1080p25.
1307                 { 0x0043, 1920, 1080,   0,  0,  0,    25,    1,  true },  // 1080p50.
1308                 { 0x008e, 1920, 1080,   0,  0,  0,    24,    1, false },  // 1080p24.
1309                 { 0x00a1, 1920, 1080,   0,  0,  0, 24000, 1001, false },  // 1080p23.98.
1310         };
1311         for (const VideoFormatEntry &entry : entries) {
1312                 if (normalized_video_format == entry.normalized_video_format) {
1313                         decoded_video_format->width = entry.width;
1314                         decoded_video_format->height = entry.height;
1315                         decoded_video_format->second_field_start = entry.second_field_start;
1316                         decoded_video_format->extra_lines_top = entry.extra_lines_top;
1317                         decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1318                         decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1319                         decoded_video_format->frame_rate_den = entry.frame_rate_den;
1320                         decoded_video_format->interlaced = entry.interlaced;
1321                         return true;
1322                 }
1323         }
1324
1325         printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1326         decoded_video_format->width = 1280;
1327         decoded_video_format->height = 720;
1328         decoded_video_format->frame_rate_nom = 60;
1329         decoded_video_format->frame_rate_den = 1;
1330         return false;
1331 }
1332
1333 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1334 {
1335         // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1336         VideoMode auto_mode;
1337         auto_mode.name = "Autodetect";
1338         auto_mode.autodetect = true;
1339         return {{ 0, auto_mode }};
1340 }
1341
1342 uint32_t BMUSBCapture::get_current_video_mode() const
1343 {
1344         return 0;  // Matches get_available_video_modes().
1345 }
1346
1347 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1348 {
1349         assert(video_mode_id == 0);  // Matches get_available_video_modes().
1350 }
1351
1352 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1353 {
1354         return {
1355                 { 0x00000000, "HDMI/SDI" },
1356                 { 0x02000000, "Component" },
1357                 { 0x04000000, "Composite" },
1358                 { 0x06000000, "S-video" }
1359         };
1360 }
1361
1362 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1363 {
1364         assert((video_input_id & ~0x06000000) == 0);
1365         current_video_input = video_input_id;
1366         update_capture_mode();
1367 }
1368
1369 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1370 {
1371         return {
1372                 { 0x00000000, "Embedded" },
1373                 { 0x10000000, "Analog" }
1374         };
1375 }
1376
1377 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1378 {
1379         assert((audio_input_id & ~0x10000000) == 0);
1380         current_audio_input = audio_input_id;
1381         update_capture_mode();
1382 }
1383
1384 void BMUSBCapture::update_capture_mode()
1385 {
1386         // clearing the 0x20000000 bit seems to activate 10-bit capture (v210).
1387         // clearing the 0x08000000 bit seems to change the capture format (other source?)
1388         uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input);
1389
1390         int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1391                 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1392         if (rc < 0) {
1393                 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1394                 exit(1);
1395         }
1396 }