1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
35 using namespace std::placeholders;
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
42 #define FRAME_SIZE (8 << 20) // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
50 atomic<bool> should_quit;
52 int find_xfer_size_for_width(int width)
54 // Video seems to require isochronous packets scaled with the width;
55 // seemingly six lines is about right, rounded up to the required 1kB
57 int size = width * 2 * 6;
58 // Note that for 10-bit input, you'll need to increase size accordingly.
59 //size = size * 4 / 3;
60 if (size % 1024 != 0) {
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
69 assert(width >= MIN_WIDTH);
70 size_t size = find_xfer_size_for_width(width);
71 int num_iso_pack = xfr->length / size;
72 if (num_iso_pack != xfr->num_iso_packets ||
73 size != xfr->iso_packet_desc[0].length) {
74 xfr->num_iso_packets = num_iso_pack;
75 libusb_set_iso_packet_lengths(xfr, size);
81 FrameAllocator::~FrameAllocator() {}
83 // Audio is more important than video, and also much cheaper.
84 // By having many more audio frames available, hopefully if something
85 // starts to drop, we'll have CPU load go down (from not having to
86 // process as much video) before we have to drop audio.
87 #define NUM_QUEUED_VIDEO_FRAMES 16
88 #define NUM_QUEUED_AUDIO_FRAMES 64
90 class MallocFrameAllocator : public FrameAllocator {
92 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
93 Frame alloc_frame() override;
94 void release_frame(Frame frame) override;
100 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
103 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
104 : frame_size(frame_size)
106 for (size_t i = 0; i < num_queued_frames; ++i) {
107 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
111 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
116 unique_lock<mutex> lock(freelist_mutex); // Meh.
117 if (freelist.empty()) {
118 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
121 vf.data = freelist.top().release();
122 vf.size = frame_size;
123 freelist.pop(); // Meh.
128 void MallocFrameAllocator::release_frame(Frame frame)
130 if (frame.overflow > 0) {
131 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
133 unique_lock<mutex> lock(freelist_mutex);
134 freelist.push(unique_ptr<uint8_t[]>(frame.data));
137 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
142 return (b - a < 0x8000);
144 int wrap_b = 0x10000 + int(b);
145 return (wrap_b - a < 0x8000);
149 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
151 unique_lock<mutex> lock(queue_lock);
152 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
153 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
154 q->back().timecode, timecode);
155 frame.owner->release_frame(frame);
161 qf.timecode = timecode;
163 q->push_back(move(qf));
164 queues_not_empty.notify_one(); // might be spurious
167 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
169 FILE *fp = fopen(filename, "wb");
170 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
171 printf("short write!\n");
176 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
178 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
181 void BMUSBCapture::dequeue_thread_func()
183 if (has_dequeue_callbacks) {
184 dequeue_init_callback();
186 while (!dequeue_thread_should_quit) {
187 unique_lock<mutex> lock(queue_lock);
188 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
190 if (dequeue_thread_should_quit) break;
192 uint16_t video_timecode = pending_video_frames.front().timecode;
193 uint16_t audio_timecode = pending_audio_frames.front().timecode;
194 AudioFormat audio_format;
195 audio_format.bits_per_sample = 24;
196 audio_format.num_channels = 8;
197 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
198 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
200 QueuedFrame video_frame = pending_video_frames.front();
201 pending_video_frames.pop_front();
203 video_frame_allocator->release_frame(video_frame.frame);
204 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
205 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
207 QueuedFrame audio_frame = pending_audio_frames.front();
208 pending_audio_frames.pop_front();
210 audio_format.id = audio_frame.format;
211 frame_callback(audio_timecode,
212 FrameAllocator::Frame(), 0, VideoFormat(),
213 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
215 QueuedFrame video_frame = pending_video_frames.front();
216 QueuedFrame audio_frame = pending_audio_frames.front();
217 pending_audio_frames.pop_front();
218 pending_video_frames.pop_front();
223 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
224 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
225 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
228 VideoFormat video_format;
229 audio_format.id = audio_frame.format;
230 if (decode_video_format(video_frame.format, &video_format)) {
231 frame_callback(video_timecode,
232 video_frame.frame, HEADER_SIZE, video_format,
233 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
235 frame_callback(video_timecode,
236 FrameAllocator::Frame(), 0, video_format,
237 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
241 if (has_dequeue_callbacks) {
242 dequeue_cleanup_callback();
246 void BMUSBCapture::start_new_frame(const uint8_t *start)
248 uint16_t format = (start[3] << 8) | start[2];
249 uint16_t timecode = (start[1] << 8) | start[0];
251 if (current_video_frame.len > 0) {
252 // If format is 0x0800 (no signal), add a fake (empty) audio
253 // frame to get it out of the queue.
254 // TODO: Figure out if there are other formats that come with
255 // no audio, and treat them the same.
256 if (format == 0x0800) {
257 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
258 if (fake_audio_frame.data == nullptr) {
259 // Oh well, it's just a no-signal frame anyway.
260 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
261 current_video_frame.owner->release_frame(current_video_frame);
262 current_video_frame = video_frame_allocator->alloc_frame();
265 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
268 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
270 // Update the assumed frame width. We might be one frame too late on format changes,
271 // but it's much better than asking the user to choose manually.
272 VideoFormat video_format;
273 if (decode_video_format(format, &video_format)) {
274 assumed_frame_width = video_format.width;
277 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
279 // //start[7], start[6], start[5], start[4],
280 // read_current_frame, FRAME_SIZE);
282 current_video_frame = video_frame_allocator->alloc_frame();
283 //if (current_video_frame.data == nullptr) {
284 // read_current_frame = -1;
286 // read_current_frame = 0;
290 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
292 uint16_t format = (start[3] << 8) | start[2];
293 uint16_t timecode = (start[1] << 8) | start[0];
294 if (current_audio_frame.len > 0) {
295 //dump_audio_block();
296 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
298 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
299 // format, timecode, read_current_audio_block);
300 current_audio_frame = audio_frame_allocator->alloc_frame();
304 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
306 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
307 for (unsigned j = 0; j < pack->actual_length; j++) {
308 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
309 printf("%02x", xfr->buffer[j + offset]);
312 else if ((j % 8) == 7)
320 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
323 uint8_t *dptr1 = dest1;
324 uint8_t *dptr2 = dest2;
326 for (size_t i = 0; i < n; i += 2) {
332 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
334 if (current_frame->data == nullptr ||
335 current_frame->len > current_frame->size ||
340 int bytes = end - start;
341 if (current_frame->len + bytes > current_frame->size) {
342 current_frame->overflow = current_frame->len + bytes - current_frame->size;
343 current_frame->len = current_frame->size;
344 if (current_frame->overflow > 1048576) {
345 printf("%d bytes overflow after last %s frame\n",
346 int(current_frame->overflow), frame_type_name);
347 current_frame->overflow = 0;
351 if (current_frame->interleaved) {
352 uint8_t *data = current_frame->data + current_frame->len / 2;
353 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
354 if (current_frame->len % 2 == 1) {
358 if (bytes % 2 == 1) {
361 ++current_frame->len;
364 memcpy_interleaved(data, data2, start, bytes);
365 current_frame->len += bytes;
367 memcpy(current_frame->data + current_frame->len, start, bytes);
368 current_frame->len += bytes;
376 void avx2_dump(const char *name, __m256i n)
378 printf("%-10s:", name);
379 printf(" %02x", _mm256_extract_epi8(n, 0));
380 printf(" %02x", _mm256_extract_epi8(n, 1));
381 printf(" %02x", _mm256_extract_epi8(n, 2));
382 printf(" %02x", _mm256_extract_epi8(n, 3));
383 printf(" %02x", _mm256_extract_epi8(n, 4));
384 printf(" %02x", _mm256_extract_epi8(n, 5));
385 printf(" %02x", _mm256_extract_epi8(n, 6));
386 printf(" %02x", _mm256_extract_epi8(n, 7));
388 printf(" %02x", _mm256_extract_epi8(n, 8));
389 printf(" %02x", _mm256_extract_epi8(n, 9));
390 printf(" %02x", _mm256_extract_epi8(n, 10));
391 printf(" %02x", _mm256_extract_epi8(n, 11));
392 printf(" %02x", _mm256_extract_epi8(n, 12));
393 printf(" %02x", _mm256_extract_epi8(n, 13));
394 printf(" %02x", _mm256_extract_epi8(n, 14));
395 printf(" %02x", _mm256_extract_epi8(n, 15));
397 printf(" %02x", _mm256_extract_epi8(n, 16));
398 printf(" %02x", _mm256_extract_epi8(n, 17));
399 printf(" %02x", _mm256_extract_epi8(n, 18));
400 printf(" %02x", _mm256_extract_epi8(n, 19));
401 printf(" %02x", _mm256_extract_epi8(n, 20));
402 printf(" %02x", _mm256_extract_epi8(n, 21));
403 printf(" %02x", _mm256_extract_epi8(n, 22));
404 printf(" %02x", _mm256_extract_epi8(n, 23));
406 printf(" %02x", _mm256_extract_epi8(n, 24));
407 printf(" %02x", _mm256_extract_epi8(n, 25));
408 printf(" %02x", _mm256_extract_epi8(n, 26));
409 printf(" %02x", _mm256_extract_epi8(n, 27));
410 printf(" %02x", _mm256_extract_epi8(n, 28));
411 printf(" %02x", _mm256_extract_epi8(n, 29));
412 printf(" %02x", _mm256_extract_epi8(n, 30));
413 printf(" %02x", _mm256_extract_epi8(n, 31));
418 // Does a memcpy and memchr in one to reduce processing time.
419 // Note that the benefit is somewhat limited if your L3 cache is small,
420 // as you'll (unfortunately) spend most of the time loading the data
423 // Complicated cases are left to the slow path; it basically stops copying
424 // up until the first instance of "sync_char" (usually a bit before, actually).
425 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
426 // data, and what we really need this for is the 00 00 ff ff marker in video data.
427 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
429 if (current_frame->data == nullptr ||
430 current_frame->len > current_frame->size ||
434 size_t orig_bytes = limit - start;
435 if (orig_bytes < 128) {
440 // Don't read more bytes than we can write.
441 limit = min(limit, start + (current_frame->size - current_frame->len));
443 // Align end to 32 bytes.
444 limit = (const uint8_t *)(intptr_t(limit) & ~31);
446 if (start >= limit) {
450 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
451 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
452 if (aligned_start != start) {
453 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
454 if (sync_start == nullptr) {
455 add_to_frame(current_frame, "", start, aligned_start);
457 add_to_frame(current_frame, "", start, sync_start);
462 // Make the length a multiple of 64.
463 if (current_frame->interleaved) {
464 if (((limit - aligned_start) % 64) != 0) {
467 assert(((limit - aligned_start) % 64) == 0);
471 const __m256i needle = _mm256_set1_epi8(sync_char);
473 const __restrict __m256i *in = (const __m256i *)aligned_start;
474 if (current_frame->interleaved) {
475 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
476 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
477 if (current_frame->len % 2 == 1) {
481 __m256i shuffle_cw = _mm256_set_epi8(
482 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
483 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
484 while (in < (const __m256i *)limit) {
485 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
486 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
487 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
489 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
490 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
491 __m256i found = _mm256_or_si256(found1, found2);
493 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
494 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
496 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
497 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
499 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
500 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
502 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
503 _mm256_storeu_si256(out2, hi);
505 if (!_mm256_testz_si256(found, found)) {
513 current_frame->len += (uint8_t *)in - aligned_start;
515 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
516 while (in < (const __m256i *)limit) {
517 __m256i data = _mm256_load_si256(in);
518 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
519 __m256i found = _mm256_cmpeq_epi8(data, needle);
520 if (!_mm256_testz_si256(found, found)) {
527 current_frame->len = (uint8_t *)out - current_frame->data;
530 const __m128i needle = _mm_set1_epi8(sync_char);
532 const __m128i *in = (const __m128i *)aligned_start;
533 if (current_frame->interleaved) {
534 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
535 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
536 if (current_frame->len % 2 == 1) {
540 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
541 while (in < (const __m128i *)limit) {
542 __m128i data1 = _mm_load_si128(in);
543 __m128i data2 = _mm_load_si128(in + 1);
544 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
545 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
546 __m128i data1_hi = _mm_srli_epi16(data1, 8);
547 __m128i data2_hi = _mm_srli_epi16(data2, 8);
548 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
549 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
550 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
551 _mm_storeu_si128(out2, hi);
552 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
553 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
554 if (!_mm_testz_si128(found1, found1) ||
555 !_mm_testz_si128(found2, found2)) {
563 current_frame->len += (uint8_t *)in - aligned_start;
565 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
566 while (in < (const __m128i *)limit) {
567 __m128i data = _mm_load_si128(in);
568 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
569 __m128i found = _mm_cmpeq_epi8(data, needle);
570 if (!_mm_testz_si128(found, found)) {
577 current_frame->len = (uint8_t *)out - current_frame->data;
581 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
583 return (const uint8_t *)in;
587 void decode_packs(const libusb_transfer *xfr,
588 const char *sync_pattern,
590 FrameAllocator::Frame *current_frame,
591 const char *frame_type_name,
592 function<void(const uint8_t *start)> start_callback)
595 for (int i = 0; i < xfr->num_iso_packets; i++) {
596 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
598 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
599 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
604 const uint8_t *start = xfr->buffer + offset;
605 const uint8_t *limit = start + pack->actual_length;
606 while (start < limit) { // Usually runs only one iteration.
608 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
609 if (start == limit) break;
610 assert(start < limit);
613 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
614 if (start_next_frame == nullptr) {
615 // add the rest of the buffer
616 add_to_frame(current_frame, frame_type_name, start, limit);
619 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
620 start = start_next_frame + sync_length; // skip sync
621 start_callback(start);
625 dump_pack(xfr, offset, pack);
627 offset += pack->length;
631 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
633 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
634 fprintf(stderr, "transfer status %d\n", xfr->status);
635 libusb_free_transfer(xfr);
639 assert(xfr->user_data != nullptr);
640 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
642 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
643 if (xfr->endpoint == 0x84) {
644 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
646 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
648 // Update the transfer with the new assumed width, if we're in the process of changing formats.
649 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
652 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
653 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
654 uint8_t *buf = libusb_control_transfer_get_data(xfr);
656 if (setup->wIndex == 44) {
657 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
659 printf("read register %2d: 0x%02x%02x%02x%02x\n",
660 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
663 memcpy(usb->register_file + usb->current_register, buf, 4);
664 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
665 if (usb->current_register == 0) {
666 // read through all of them
667 printf("register dump:");
668 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
669 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
673 libusb_fill_control_setup(xfr->buffer,
674 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
675 /*index=*/usb->current_register, /*length=*/4);
680 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
681 for (i = 0; i < xfr->actual_length; i++) {
682 printf("%02x", xfr->buffer[i]);
692 int rc = libusb_submit_transfer(xfr);
694 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
699 void BMUSBCapture::usb_thread_func()
702 memset(¶m, 0, sizeof(param));
703 param.sched_priority = 1;
704 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
705 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
707 while (!should_quit) {
708 int rc = libusb_handle_events(nullptr);
709 if (rc != LIBUSB_SUCCESS)
714 struct USBCardDevice {
717 libusb_device *device;
720 libusb_device_handle *open_card(int card_index, string *description)
722 libusb_device **devices;
723 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
724 if (num_devices == -1) {
725 fprintf(stderr, "Error finding USB devices\n");
728 vector<USBCardDevice> found_cards;
729 for (ssize_t i = 0; i < num_devices; ++i) {
730 libusb_device_descriptor desc;
731 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
732 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
736 uint8_t bus = libusb_get_bus_number(devices[i]);
737 uint8_t port = libusb_get_port_number(devices[i]);
739 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
740 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
741 libusb_unref_device(devices[i]);
745 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
747 libusb_free_device_list(devices, 0);
749 // Sort the devices to get a consistent ordering.
750 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
751 if (a.product != b.product)
752 return a.product < b.product;
754 return a.bus < b.bus;
755 return a.port < b.port;
758 for (size_t i = 0; i < found_cards.size(); ++i) {
759 const char *product_name = nullptr;
760 if (found_cards[i].product == 0xbd3b) {
761 product_name = "Intensity Shuttle";
762 } else if (found_cards[i].product == 0xbd4f) {
763 product_name = "UltraStudio SDI";
769 snprintf(buf, sizeof(buf), "Card %d: Bus %03u Device %03u %s",
770 int(i), found_cards[i].bus, found_cards[i].port, product_name);
771 if (i == size_t(card_index)) {
774 fprintf(stderr, "%s\n", buf);
777 if (size_t(card_index) >= found_cards.size()) {
778 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
782 libusb_device_handle *devh;
783 int rc = libusb_open(found_cards[card_index].device, &devh);
785 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
789 for (size_t i = 0; i < found_cards.size(); ++i) {
790 libusb_unref_device(found_cards[i].device);
796 void BMUSBCapture::configure_card()
798 if (video_frame_allocator == nullptr) {
799 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
801 if (audio_frame_allocator == nullptr) {
802 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
804 dequeue_thread_should_quit = false;
805 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
808 struct libusb_transfer *xfr;
810 rc = libusb_init(nullptr);
812 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
816 libusb_device_handle *devh = open_card(card_index, &description);
818 fprintf(stderr, "Error finding USB device\n");
822 libusb_config_descriptor *config;
823 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
825 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
830 printf("%d interface\n", config->bNumInterfaces);
831 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
832 printf(" interface %d\n", interface_number);
833 const libusb_interface *interface = &config->interface[interface_number];
834 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
835 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
836 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
837 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
838 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
839 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
845 rc = libusb_set_configuration(devh, /*configuration=*/1);
847 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
851 rc = libusb_claim_interface(devh, 0);
853 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
857 // Alternate setting 1 is output, alternate setting 2 is input.
858 // Card is reset when switching alternates, so the driver uses
859 // this “double switch” when it wants to reset.
861 // There's also alternate settings 3 and 4, which seem to be
862 // like 1 and 2 except they advertise less bandwidth needed.
863 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
865 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
868 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
870 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
874 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
876 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
882 rc = libusb_claim_interface(devh, 3);
884 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
890 // 44 is some kind of timer register (first 16 bits count upwards)
891 // 24 is some sort of watchdog?
892 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
893 // (or will go to 0x73c60010?), also seen 0x73c60100
894 // 12 also changes all the time, unclear why
895 // 16 seems to be autodetected mode somehow
896 // -- this is e00115e0 after reset?
897 // ed0115e0 after mode change [to output?]
898 // 2d0015e0 after more mode change [to input]
899 // ed0115e0 after more mode change
900 // 2d0015e0 after more mode change
902 // 390115e0 seems to indicate we have signal
903 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
905 // 200015e0 on startup
906 // changes to 250115e0 when we sync to the signal
908 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
910 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
912 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
913 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
915 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
916 // perhaps some of them are related to analog output?
918 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
919 // but the driver sets it to 0x8036802a at some point.
921 // all of this is on request 214/215. other requests (192, 219,
922 // 222, 223, 224) are used for firmware upgrade. Probably best to
923 // stay out of it unless you know what you're doing.
927 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
930 // 0x01 - stable signal
932 // 0x08 - unknown (audio??)
942 static const ctrl ctrls[] = {
943 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
944 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
946 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
948 // clearing the 0x08000000 bit seems to change the capture format (other source?)
949 // 0x10000000 = analog audio instead of embedded audio, it seems
950 // 0x3a000000 = component video? (analog audio)
951 // 0x3c000000 = composite video? (analog audio)
952 // 0x3e000000 = s-video? (analog audio)
953 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
954 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
955 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
956 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
957 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
960 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
961 uint32_t flipped = htonl(ctrls[req].data);
962 static uint8_t value[4];
963 memcpy(value, &flipped, sizeof(flipped));
964 int size = sizeof(value);
965 //if (ctrls[req].request == 215) size = 0;
966 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
967 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
969 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
973 if (ctrls[req].index == 16 && rc == 4) {
974 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
978 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
979 for (int i = 0; i < rc; ++i) {
980 printf("%02x", value[i]);
989 static int my_index = 0;
990 static uint8_t value[4];
991 int size = sizeof(value);
992 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
993 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
995 fprintf(stderr, "Error on control\n");
998 printf("rc=%d index=%d: 0x", rc, my_index);
999 for (int i = 0; i < rc; ++i) {
1000 printf("%02x", value[i]);
1007 // set up an asynchronous transfer of the timer register
1008 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1009 static int completed = 0;
1011 xfr = libusb_alloc_transfer(0);
1012 libusb_fill_control_setup(cmdbuf,
1013 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1014 /*index=*/44, /*length=*/4);
1015 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1016 xfr->user_data = this;
1017 libusb_submit_transfer(xfr);
1019 // set up an asynchronous transfer of register 24
1020 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1021 static int completed2 = 0;
1023 xfr = libusb_alloc_transfer(0);
1024 libusb_fill_control_setup(cmdbuf2,
1025 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1026 /*index=*/24, /*length=*/4);
1027 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1028 xfr->user_data = this;
1029 libusb_submit_transfer(xfr);
1032 // set up an asynchronous transfer of the register dump
1033 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1034 static int completed3 = 0;
1036 xfr = libusb_alloc_transfer(0);
1037 libusb_fill_control_setup(cmdbuf3,
1038 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1039 /*index=*/current_register, /*length=*/4);
1040 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1041 xfr->user_data = this;
1042 //libusb_submit_transfer(xfr);
1044 audiofp = fopen("audio.raw", "wb");
1046 // set up isochronous transfers for audio and video
1047 for (int e = 3; e <= 4; ++e) {
1048 //int num_transfers = (e == 3) ? 6 : 6;
1049 int num_transfers = 10;
1050 for (int i = 0; i < num_transfers; ++i) {
1052 int num_iso_pack, size;
1054 // Allocate for minimum width (because that will give us the most
1055 // number of packets, so we don't need to reallocated, but we'll
1056 // default to 720p for the first frame.
1057 size = find_xfer_size_for_width(MIN_WIDTH);
1058 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1059 buf_size = USB_VIDEO_TRANSFER_SIZE;
1063 buf_size = num_iso_pack * size;
1065 assert(size_t(num_iso_pack * size) <= buf_size);
1066 uint8_t *buf = new uint8_t[buf_size];
1068 xfr = libusb_alloc_transfer(num_iso_pack);
1070 fprintf(stderr, "oom\n");
1074 int ep = LIBUSB_ENDPOINT_IN | e;
1075 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1076 num_iso_pack, cb_xfr, nullptr, 0);
1077 libusb_set_iso_packet_lengths(xfr, size);
1078 xfr->user_data = this;
1081 change_xfer_size_for_width(assumed_frame_width, xfr);
1084 iso_xfrs.push_back(xfr);
1089 void BMUSBCapture::start_bm_capture()
1092 for (libusb_transfer *xfr : iso_xfrs) {
1093 int rc = libusb_submit_transfer(xfr);
1096 //printf("num_bytes=%d\n", num_bytes);
1097 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1098 xfr->endpoint, i, libusb_error_name(rc));
1105 libusb_release_interface(devh, 0);
1109 libusb_exit(nullptr);
1114 void BMUSBCapture::stop_dequeue_thread()
1116 dequeue_thread_should_quit = true;
1117 queues_not_empty.notify_all();
1118 dequeue_thread.join();
1121 void BMUSBCapture::start_bm_thread()
1123 should_quit = false;
1124 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1127 void BMUSBCapture::stop_bm_thread()
1133 struct VideoFormatEntry {
1134 uint16_t normalized_video_format;
1135 unsigned width, height, second_field_start;
1136 unsigned extra_lines_top, extra_lines_bottom;
1137 unsigned frame_rate_nom, frame_rate_den;
1141 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1143 decoded_video_format->id = video_format;
1144 decoded_video_format->interlaced = false;
1146 // TODO: Add these for all formats as we find them.
1147 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1149 if (video_format == 0x0800) {
1150 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1151 // It's a strange thing, but what can you do.
1152 decoded_video_format->width = 720;
1153 decoded_video_format->height = 525;
1154 decoded_video_format->extra_lines_top = 0;
1155 decoded_video_format->extra_lines_bottom = 0;
1156 decoded_video_format->frame_rate_nom = 3013;
1157 decoded_video_format->frame_rate_den = 100;
1158 decoded_video_format->has_signal = false;
1161 if ((video_format & 0xe800) != 0xe800) {
1162 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1164 decoded_video_format->width = 0;
1165 decoded_video_format->height = 0;
1166 decoded_video_format->extra_lines_top = 0;
1167 decoded_video_format->extra_lines_bottom = 0;
1168 decoded_video_format->frame_rate_nom = 60;
1169 decoded_video_format->frame_rate_den = 1;
1170 decoded_video_format->has_signal = false;
1174 decoded_video_format->has_signal = true;
1176 // NTSC (480i59.94, I suppose). A special case, see below.
1177 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1178 decoded_video_format->width = 720;
1179 decoded_video_format->height = 480;
1180 decoded_video_format->extra_lines_top = 17;
1181 decoded_video_format->extra_lines_bottom = 28;
1182 decoded_video_format->frame_rate_nom = 30000;
1183 decoded_video_format->frame_rate_den = 1001;
1184 decoded_video_format->second_field_start = 280;
1185 decoded_video_format->interlaced = true;
1189 // PAL (576i50, I suppose). A special case, see below.
1190 if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1191 decoded_video_format->width = 720;
1192 decoded_video_format->height = 576;
1193 decoded_video_format->extra_lines_top = 22;
1194 decoded_video_format->extra_lines_bottom = 27;
1195 decoded_video_format->frame_rate_nom = 25;
1196 decoded_video_format->frame_rate_den = 1;
1197 decoded_video_format->second_field_start = 335;
1198 decoded_video_format->interlaced = true;
1202 // 0x8 seems to be a flag about availability of deep color on the input,
1203 // except when it's not (e.g. it's the only difference between NTSC
1204 // and PAL). Rather confusing. But we clear it here nevertheless, because
1205 // usually it doesn't mean anything.
1207 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1208 uint16_t normalized_video_format = video_format & ~0xe80c;
1209 constexpr VideoFormatEntry entries[] = {
1210 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
1211 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
1212 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
1213 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
1214 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1215 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1216 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
1217 { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
1218 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
1219 { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
1220 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
1221 { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
1222 { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
1223 { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
1224 { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
1226 for (const VideoFormatEntry &entry : entries) {
1227 if (normalized_video_format == entry.normalized_video_format) {
1228 decoded_video_format->width = entry.width;
1229 decoded_video_format->height = entry.height;
1230 decoded_video_format->second_field_start = entry.second_field_start;
1231 decoded_video_format->extra_lines_top = entry.extra_lines_top;
1232 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1233 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1234 decoded_video_format->frame_rate_den = entry.frame_rate_den;
1235 decoded_video_format->interlaced = entry.interlaced;
1240 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1241 decoded_video_format->width = 1280;
1242 decoded_video_format->height = 720;
1243 decoded_video_format->frame_rate_nom = 60;
1244 decoded_video_format->frame_rate_den = 1;