1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
35 using namespace std::placeholders;
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
42 #define FRAME_SIZE (8 << 20) // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
50 atomic<bool> should_quit;
52 int find_xfer_size_for_width(int width)
54 // Video seems to require isochronous packets scaled with the width;
55 // seemingly six lines is about right, rounded up to the required 1kB
57 int size = width * 2 * 6;
58 // Note that for 10-bit input, you'll need to increase size accordingly.
59 //size = size * 4 / 3;
60 if (size % 1024 != 0) {
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
69 assert(width >= MIN_WIDTH);
70 size_t size = find_xfer_size_for_width(width);
71 int num_iso_pack = xfr->length / size;
72 if (num_iso_pack != xfr->num_iso_packets ||
73 size != xfr->iso_packet_desc[0].length) {
74 xfr->num_iso_packets = num_iso_pack;
75 libusb_set_iso_packet_lengths(xfr, size);
81 FrameAllocator::~FrameAllocator() {}
83 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
84 : frame_size(frame_size)
86 for (size_t i = 0; i < num_queued_frames; ++i) {
87 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
91 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
96 unique_lock<mutex> lock(freelist_mutex); // Meh.
97 if (freelist.empty()) {
98 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
101 vf.data = freelist.top().release();
102 vf.size = frame_size;
103 freelist.pop(); // Meh.
108 void MallocFrameAllocator::release_frame(Frame frame)
110 if (frame.overflow > 0) {
111 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
113 unique_lock<mutex> lock(freelist_mutex);
114 freelist.push(unique_ptr<uint8_t[]>(frame.data));
117 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
122 return (b - a < 0x8000);
124 int wrap_b = 0x10000 + int(b);
125 return (wrap_b - a < 0x8000);
129 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
131 unique_lock<mutex> lock(queue_lock);
132 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
133 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
134 q->back().timecode, timecode);
135 frame.owner->release_frame(frame);
141 qf.timecode = timecode;
143 q->push_back(move(qf));
144 queues_not_empty.notify_one(); // might be spurious
147 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
149 FILE *fp = fopen(filename, "wb");
150 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
151 printf("short write!\n");
156 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
158 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
161 void BMUSBCapture::dequeue_thread_func()
163 if (has_dequeue_callbacks) {
164 dequeue_init_callback();
166 while (!dequeue_thread_should_quit) {
167 unique_lock<mutex> lock(queue_lock);
168 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
170 if (dequeue_thread_should_quit) break;
172 uint16_t video_timecode = pending_video_frames.front().timecode;
173 uint16_t audio_timecode = pending_audio_frames.front().timecode;
174 AudioFormat audio_format;
175 audio_format.bits_per_sample = 24;
176 audio_format.num_channels = 8;
177 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
178 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
180 QueuedFrame video_frame = pending_video_frames.front();
181 pending_video_frames.pop_front();
183 video_frame_allocator->release_frame(video_frame.frame);
184 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
185 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
187 QueuedFrame audio_frame = pending_audio_frames.front();
188 pending_audio_frames.pop_front();
190 audio_format.id = audio_frame.format;
191 frame_callback(audio_timecode,
192 FrameAllocator::Frame(), 0, VideoFormat(),
193 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
195 QueuedFrame video_frame = pending_video_frames.front();
196 QueuedFrame audio_frame = pending_audio_frames.front();
197 pending_audio_frames.pop_front();
198 pending_video_frames.pop_front();
203 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
204 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
205 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
208 VideoFormat video_format;
209 audio_format.id = audio_frame.format;
210 if (decode_video_format(video_frame.format, &video_format)) {
211 frame_callback(video_timecode,
212 video_frame.frame, HEADER_SIZE, video_format,
213 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
215 frame_callback(video_timecode,
216 FrameAllocator::Frame(), 0, video_format,
217 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
221 if (has_dequeue_callbacks) {
222 dequeue_cleanup_callback();
226 void BMUSBCapture::start_new_frame(const uint8_t *start)
228 uint16_t format = (start[3] << 8) | start[2];
229 uint16_t timecode = (start[1] << 8) | start[0];
231 if (current_video_frame.len > 0) {
232 // If format is 0x0800 (no signal), add a fake (empty) audio
233 // frame to get it out of the queue.
234 // TODO: Figure out if there are other formats that come with
235 // no audio, and treat them the same.
236 if (format == 0x0800) {
237 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
238 if (fake_audio_frame.data == nullptr) {
239 // Oh well, it's just a no-signal frame anyway.
240 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
241 current_video_frame.owner->release_frame(current_video_frame);
242 current_video_frame = video_frame_allocator->alloc_frame();
245 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
248 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
250 // Update the assumed frame width. We might be one frame too late on format changes,
251 // but it's much better than asking the user to choose manually.
252 VideoFormat video_format;
253 if (decode_video_format(format, &video_format)) {
254 assumed_frame_width = video_format.width;
257 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
259 // //start[7], start[6], start[5], start[4],
260 // read_current_frame, FRAME_SIZE);
262 current_video_frame = video_frame_allocator->alloc_frame();
263 //if (current_video_frame.data == nullptr) {
264 // read_current_frame = -1;
266 // read_current_frame = 0;
270 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
272 uint16_t format = (start[3] << 8) | start[2];
273 uint16_t timecode = (start[1] << 8) | start[0];
274 if (current_audio_frame.len > 0) {
275 //dump_audio_block();
276 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
278 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
279 // format, timecode, read_current_audio_block);
280 current_audio_frame = audio_frame_allocator->alloc_frame();
284 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
286 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
287 for (unsigned j = 0; j < pack->actual_length; j++) {
288 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
289 printf("%02x", xfr->buffer[j + offset]);
292 else if ((j % 8) == 7)
300 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
303 uint8_t *dptr1 = dest1;
304 uint8_t *dptr2 = dest2;
306 for (size_t i = 0; i < n; i += 2) {
312 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
314 if (current_frame->data == nullptr ||
315 current_frame->len > current_frame->size ||
320 int bytes = end - start;
321 if (current_frame->len + bytes > current_frame->size) {
322 current_frame->overflow = current_frame->len + bytes - current_frame->size;
323 current_frame->len = current_frame->size;
324 if (current_frame->overflow > 1048576) {
325 printf("%d bytes overflow after last %s frame\n",
326 int(current_frame->overflow), frame_type_name);
327 current_frame->overflow = 0;
331 if (current_frame->interleaved) {
332 uint8_t *data = current_frame->data + current_frame->len / 2;
333 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
334 if (current_frame->len % 2 == 1) {
338 if (bytes % 2 == 1) {
341 ++current_frame->len;
344 memcpy_interleaved(data, data2, start, bytes);
345 current_frame->len += bytes;
347 memcpy(current_frame->data + current_frame->len, start, bytes);
348 current_frame->len += bytes;
356 void avx2_dump(const char *name, __m256i n)
358 printf("%-10s:", name);
359 printf(" %02x", _mm256_extract_epi8(n, 0));
360 printf(" %02x", _mm256_extract_epi8(n, 1));
361 printf(" %02x", _mm256_extract_epi8(n, 2));
362 printf(" %02x", _mm256_extract_epi8(n, 3));
363 printf(" %02x", _mm256_extract_epi8(n, 4));
364 printf(" %02x", _mm256_extract_epi8(n, 5));
365 printf(" %02x", _mm256_extract_epi8(n, 6));
366 printf(" %02x", _mm256_extract_epi8(n, 7));
368 printf(" %02x", _mm256_extract_epi8(n, 8));
369 printf(" %02x", _mm256_extract_epi8(n, 9));
370 printf(" %02x", _mm256_extract_epi8(n, 10));
371 printf(" %02x", _mm256_extract_epi8(n, 11));
372 printf(" %02x", _mm256_extract_epi8(n, 12));
373 printf(" %02x", _mm256_extract_epi8(n, 13));
374 printf(" %02x", _mm256_extract_epi8(n, 14));
375 printf(" %02x", _mm256_extract_epi8(n, 15));
377 printf(" %02x", _mm256_extract_epi8(n, 16));
378 printf(" %02x", _mm256_extract_epi8(n, 17));
379 printf(" %02x", _mm256_extract_epi8(n, 18));
380 printf(" %02x", _mm256_extract_epi8(n, 19));
381 printf(" %02x", _mm256_extract_epi8(n, 20));
382 printf(" %02x", _mm256_extract_epi8(n, 21));
383 printf(" %02x", _mm256_extract_epi8(n, 22));
384 printf(" %02x", _mm256_extract_epi8(n, 23));
386 printf(" %02x", _mm256_extract_epi8(n, 24));
387 printf(" %02x", _mm256_extract_epi8(n, 25));
388 printf(" %02x", _mm256_extract_epi8(n, 26));
389 printf(" %02x", _mm256_extract_epi8(n, 27));
390 printf(" %02x", _mm256_extract_epi8(n, 28));
391 printf(" %02x", _mm256_extract_epi8(n, 29));
392 printf(" %02x", _mm256_extract_epi8(n, 30));
393 printf(" %02x", _mm256_extract_epi8(n, 31));
398 // Does a memcpy and memchr in one to reduce processing time.
399 // Note that the benefit is somewhat limited if your L3 cache is small,
400 // as you'll (unfortunately) spend most of the time loading the data
403 // Complicated cases are left to the slow path; it basically stops copying
404 // up until the first instance of "sync_char" (usually a bit before, actually).
405 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
406 // data, and what we really need this for is the 00 00 ff ff marker in video data.
407 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
409 if (current_frame->data == nullptr ||
410 current_frame->len > current_frame->size ||
414 size_t orig_bytes = limit - start;
415 if (orig_bytes < 128) {
420 // Don't read more bytes than we can write.
421 limit = min(limit, start + (current_frame->size - current_frame->len));
423 // Align end to 32 bytes.
424 limit = (const uint8_t *)(intptr_t(limit) & ~31);
426 if (start >= limit) {
430 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
431 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
432 if (aligned_start != start) {
433 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
434 if (sync_start == nullptr) {
435 add_to_frame(current_frame, "", start, aligned_start);
437 add_to_frame(current_frame, "", start, sync_start);
442 // Make the length a multiple of 64.
443 if (current_frame->interleaved) {
444 if (((limit - aligned_start) % 64) != 0) {
447 assert(((limit - aligned_start) % 64) == 0);
451 const __m256i needle = _mm256_set1_epi8(sync_char);
453 const __restrict __m256i *in = (const __m256i *)aligned_start;
454 if (current_frame->interleaved) {
455 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
456 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
457 if (current_frame->len % 2 == 1) {
461 __m256i shuffle_cw = _mm256_set_epi8(
462 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
463 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
464 while (in < (const __m256i *)limit) {
465 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
466 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
467 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
469 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
470 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
471 __m256i found = _mm256_or_si256(found1, found2);
473 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
474 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
476 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
477 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
479 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
480 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
482 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
483 _mm256_storeu_si256(out2, hi);
485 if (!_mm256_testz_si256(found, found)) {
493 current_frame->len += (uint8_t *)in - aligned_start;
495 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
496 while (in < (const __m256i *)limit) {
497 __m256i data = _mm256_load_si256(in);
498 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
499 __m256i found = _mm256_cmpeq_epi8(data, needle);
500 if (!_mm256_testz_si256(found, found)) {
507 current_frame->len = (uint8_t *)out - current_frame->data;
510 const __m128i needle = _mm_set1_epi8(sync_char);
512 const __m128i *in = (const __m128i *)aligned_start;
513 if (current_frame->interleaved) {
514 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
515 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
516 if (current_frame->len % 2 == 1) {
520 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
521 while (in < (const __m128i *)limit) {
522 __m128i data1 = _mm_load_si128(in);
523 __m128i data2 = _mm_load_si128(in + 1);
524 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
525 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
526 __m128i data1_hi = _mm_srli_epi16(data1, 8);
527 __m128i data2_hi = _mm_srli_epi16(data2, 8);
528 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
529 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
530 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
531 _mm_storeu_si128(out2, hi);
532 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
533 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
534 if (!_mm_testz_si128(found1, found1) ||
535 !_mm_testz_si128(found2, found2)) {
543 current_frame->len += (uint8_t *)in - aligned_start;
545 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
546 while (in < (const __m128i *)limit) {
547 __m128i data = _mm_load_si128(in);
548 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
549 __m128i found = _mm_cmpeq_epi8(data, needle);
550 if (!_mm_testz_si128(found, found)) {
557 current_frame->len = (uint8_t *)out - current_frame->data;
561 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
563 return (const uint8_t *)in;
567 void decode_packs(const libusb_transfer *xfr,
568 const char *sync_pattern,
570 FrameAllocator::Frame *current_frame,
571 const char *frame_type_name,
572 function<void(const uint8_t *start)> start_callback)
575 for (int i = 0; i < xfr->num_iso_packets; i++) {
576 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
578 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
579 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
584 const uint8_t *start = xfr->buffer + offset;
585 const uint8_t *limit = start + pack->actual_length;
586 while (start < limit) { // Usually runs only one iteration.
588 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
589 if (start == limit) break;
590 assert(start < limit);
593 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
594 if (start_next_frame == nullptr) {
595 // add the rest of the buffer
596 add_to_frame(current_frame, frame_type_name, start, limit);
599 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
600 start = start_next_frame + sync_length; // skip sync
601 start_callback(start);
605 dump_pack(xfr, offset, pack);
607 offset += pack->length;
611 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
613 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
614 fprintf(stderr, "transfer status %d\n", xfr->status);
615 libusb_free_transfer(xfr);
619 assert(xfr->user_data != nullptr);
620 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
622 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
623 if (xfr->endpoint == 0x84) {
624 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
626 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
628 // Update the transfer with the new assumed width, if we're in the process of changing formats.
629 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
632 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
633 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
634 uint8_t *buf = libusb_control_transfer_get_data(xfr);
636 if (setup->wIndex == 44) {
637 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
639 printf("read register %2d: 0x%02x%02x%02x%02x\n",
640 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
643 memcpy(usb->register_file + usb->current_register, buf, 4);
644 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
645 if (usb->current_register == 0) {
646 // read through all of them
647 printf("register dump:");
648 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
649 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
653 libusb_fill_control_setup(xfr->buffer,
654 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
655 /*index=*/usb->current_register, /*length=*/4);
660 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
661 for (i = 0; i < xfr->actual_length; i++) {
662 printf("%02x", xfr->buffer[i]);
672 int rc = libusb_submit_transfer(xfr);
674 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
679 void BMUSBCapture::usb_thread_func()
682 memset(¶m, 0, sizeof(param));
683 param.sched_priority = 1;
684 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
685 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
687 while (!should_quit) {
688 int rc = libusb_handle_events(nullptr);
689 if (rc != LIBUSB_SUCCESS)
694 struct USBCardDevice {
697 libusb_device *device;
700 libusb_device_handle *open_card(int card_index, string *description)
702 libusb_device **devices;
703 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
704 if (num_devices == -1) {
705 fprintf(stderr, "Error finding USB devices\n");
708 vector<USBCardDevice> found_cards;
709 for (ssize_t i = 0; i < num_devices; ++i) {
710 libusb_device_descriptor desc;
711 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
712 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
716 uint8_t bus = libusb_get_bus_number(devices[i]);
717 uint8_t port = libusb_get_port_number(devices[i]);
719 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
720 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
721 libusb_unref_device(devices[i]);
725 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
727 libusb_free_device_list(devices, 0);
729 // Sort the devices to get a consistent ordering.
730 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
731 if (a.product != b.product)
732 return a.product < b.product;
734 return a.bus < b.bus;
735 return a.port < b.port;
738 for (size_t i = 0; i < found_cards.size(); ++i) {
739 const char *product_name = nullptr;
740 if (found_cards[i].product == 0xbd3b) {
741 product_name = "Intensity Shuttle";
742 } else if (found_cards[i].product == 0xbd4f) {
743 product_name = "UltraStudio SDI";
749 snprintf(buf, sizeof(buf), "Card %d: Bus %03u Device %03u %s",
750 int(i), found_cards[i].bus, found_cards[i].port, product_name);
751 if (i == size_t(card_index)) {
754 fprintf(stderr, "%s\n", buf);
757 if (size_t(card_index) >= found_cards.size()) {
758 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
762 libusb_device_handle *devh;
763 int rc = libusb_open(found_cards[card_index].device, &devh);
765 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
769 for (size_t i = 0; i < found_cards.size(); ++i) {
770 libusb_unref_device(found_cards[i].device);
776 void BMUSBCapture::configure_card()
778 if (video_frame_allocator == nullptr) {
779 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
781 if (audio_frame_allocator == nullptr) {
782 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
784 dequeue_thread_should_quit = false;
785 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
788 struct libusb_transfer *xfr;
790 rc = libusb_init(nullptr);
792 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
796 libusb_device_handle *devh = open_card(card_index, &description);
798 fprintf(stderr, "Error finding USB device\n");
802 libusb_config_descriptor *config;
803 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
805 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
810 printf("%d interface\n", config->bNumInterfaces);
811 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
812 printf(" interface %d\n", interface_number);
813 const libusb_interface *interface = &config->interface[interface_number];
814 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
815 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
816 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
817 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
818 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
819 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
825 rc = libusb_set_configuration(devh, /*configuration=*/1);
827 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
831 rc = libusb_claim_interface(devh, 0);
833 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
837 // Alternate setting 1 is output, alternate setting 2 is input.
838 // Card is reset when switching alternates, so the driver uses
839 // this “double switch” when it wants to reset.
841 // There's also alternate settings 3 and 4, which seem to be
842 // like 1 and 2 except they advertise less bandwidth needed.
843 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
845 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
848 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
850 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
854 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
856 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
862 rc = libusb_claim_interface(devh, 3);
864 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
870 // 44 is some kind of timer register (first 16 bits count upwards)
871 // 24 is some sort of watchdog?
872 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
873 // (or will go to 0x73c60010?), also seen 0x73c60100
874 // 12 also changes all the time, unclear why
875 // 16 seems to be autodetected mode somehow
876 // -- this is e00115e0 after reset?
877 // ed0115e0 after mode change [to output?]
878 // 2d0015e0 after more mode change [to input]
879 // ed0115e0 after more mode change
880 // 2d0015e0 after more mode change
882 // 390115e0 seems to indicate we have signal
883 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
885 // 200015e0 on startup
886 // changes to 250115e0 when we sync to the signal
888 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
890 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
892 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
893 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
895 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
896 // perhaps some of them are related to analog output?
898 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
899 // but the driver sets it to 0x8036802a at some point.
901 // all of this is on request 214/215. other requests (192, 219,
902 // 222, 223, 224) are used for firmware upgrade. Probably best to
903 // stay out of it unless you know what you're doing.
907 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
910 // 0x01 - stable signal
912 // 0x08 - unknown (audio??)
922 static const ctrl ctrls[] = {
923 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
924 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
926 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
928 // clearing the 0x08000000 bit seems to change the capture format (other source?)
929 // 0x10000000 = analog audio instead of embedded audio, it seems
930 // 0x3a000000 = component video? (analog audio)
931 // 0x3c000000 = composite video? (analog audio)
932 // 0x3e000000 = s-video? (analog audio)
933 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
934 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
935 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
936 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
937 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
940 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
941 uint32_t flipped = htonl(ctrls[req].data);
942 static uint8_t value[4];
943 memcpy(value, &flipped, sizeof(flipped));
944 int size = sizeof(value);
945 //if (ctrls[req].request == 215) size = 0;
946 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
947 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
949 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
953 if (ctrls[req].index == 16 && rc == 4) {
954 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
958 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
959 for (int i = 0; i < rc; ++i) {
960 printf("%02x", value[i]);
969 static int my_index = 0;
970 static uint8_t value[4];
971 int size = sizeof(value);
972 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
973 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
975 fprintf(stderr, "Error on control\n");
978 printf("rc=%d index=%d: 0x", rc, my_index);
979 for (int i = 0; i < rc; ++i) {
980 printf("%02x", value[i]);
987 // set up an asynchronous transfer of the timer register
988 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
989 static int completed = 0;
991 xfr = libusb_alloc_transfer(0);
992 libusb_fill_control_setup(cmdbuf,
993 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
994 /*index=*/44, /*length=*/4);
995 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
996 xfr->user_data = this;
997 libusb_submit_transfer(xfr);
999 // set up an asynchronous transfer of register 24
1000 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1001 static int completed2 = 0;
1003 xfr = libusb_alloc_transfer(0);
1004 libusb_fill_control_setup(cmdbuf2,
1005 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1006 /*index=*/24, /*length=*/4);
1007 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1008 xfr->user_data = this;
1009 libusb_submit_transfer(xfr);
1012 // set up an asynchronous transfer of the register dump
1013 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1014 static int completed3 = 0;
1016 xfr = libusb_alloc_transfer(0);
1017 libusb_fill_control_setup(cmdbuf3,
1018 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1019 /*index=*/current_register, /*length=*/4);
1020 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1021 xfr->user_data = this;
1022 //libusb_submit_transfer(xfr);
1024 audiofp = fopen("audio.raw", "wb");
1026 // set up isochronous transfers for audio and video
1027 for (int e = 3; e <= 4; ++e) {
1028 //int num_transfers = (e == 3) ? 6 : 6;
1029 int num_transfers = 10;
1030 for (int i = 0; i < num_transfers; ++i) {
1032 int num_iso_pack, size;
1034 // Allocate for minimum width (because that will give us the most
1035 // number of packets, so we don't need to reallocated, but we'll
1036 // default to 720p for the first frame.
1037 size = find_xfer_size_for_width(MIN_WIDTH);
1038 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1039 buf_size = USB_VIDEO_TRANSFER_SIZE;
1043 buf_size = num_iso_pack * size;
1045 assert(size_t(num_iso_pack * size) <= buf_size);
1046 uint8_t *buf = new uint8_t[buf_size];
1048 xfr = libusb_alloc_transfer(num_iso_pack);
1050 fprintf(stderr, "oom\n");
1054 int ep = LIBUSB_ENDPOINT_IN | e;
1055 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1056 num_iso_pack, cb_xfr, nullptr, 0);
1057 libusb_set_iso_packet_lengths(xfr, size);
1058 xfr->user_data = this;
1061 change_xfer_size_for_width(assumed_frame_width, xfr);
1064 iso_xfrs.push_back(xfr);
1069 void BMUSBCapture::start_bm_capture()
1072 for (libusb_transfer *xfr : iso_xfrs) {
1073 int rc = libusb_submit_transfer(xfr);
1076 //printf("num_bytes=%d\n", num_bytes);
1077 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1078 xfr->endpoint, i, libusb_error_name(rc));
1085 libusb_release_interface(devh, 0);
1089 libusb_exit(nullptr);
1094 void BMUSBCapture::stop_dequeue_thread()
1096 dequeue_thread_should_quit = true;
1097 queues_not_empty.notify_all();
1098 dequeue_thread.join();
1101 void BMUSBCapture::start_bm_thread()
1103 should_quit = false;
1104 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1107 void BMUSBCapture::stop_bm_thread()
1113 struct VideoFormatEntry {
1114 uint16_t normalized_video_format;
1115 unsigned width, height, second_field_start;
1116 unsigned extra_lines_top, extra_lines_bottom;
1117 unsigned frame_rate_nom, frame_rate_den;
1121 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1123 decoded_video_format->id = video_format;
1124 decoded_video_format->interlaced = false;
1126 // TODO: Add these for all formats as we find them.
1127 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1129 if (video_format == 0x0800) {
1130 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1131 // It's a strange thing, but what can you do.
1132 decoded_video_format->width = 720;
1133 decoded_video_format->height = 525;
1134 decoded_video_format->extra_lines_top = 0;
1135 decoded_video_format->extra_lines_bottom = 0;
1136 decoded_video_format->frame_rate_nom = 3013;
1137 decoded_video_format->frame_rate_den = 100;
1138 decoded_video_format->has_signal = false;
1141 if ((video_format & 0xe800) != 0xe800) {
1142 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1144 decoded_video_format->width = 0;
1145 decoded_video_format->height = 0;
1146 decoded_video_format->extra_lines_top = 0;
1147 decoded_video_format->extra_lines_bottom = 0;
1148 decoded_video_format->frame_rate_nom = 60;
1149 decoded_video_format->frame_rate_den = 1;
1150 decoded_video_format->has_signal = false;
1154 decoded_video_format->has_signal = true;
1156 // NTSC (480i59.94, I suppose). A special case, see below.
1157 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1158 decoded_video_format->width = 720;
1159 decoded_video_format->height = 480;
1160 decoded_video_format->extra_lines_top = 17;
1161 decoded_video_format->extra_lines_bottom = 28;
1162 decoded_video_format->frame_rate_nom = 30000;
1163 decoded_video_format->frame_rate_den = 1001;
1164 decoded_video_format->second_field_start = 280;
1165 decoded_video_format->interlaced = true;
1169 // PAL (576i50, I suppose). A special case, see below.
1170 if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1171 decoded_video_format->width = 720;
1172 decoded_video_format->height = 576;
1173 decoded_video_format->extra_lines_top = 22;
1174 decoded_video_format->extra_lines_bottom = 27;
1175 decoded_video_format->frame_rate_nom = 25;
1176 decoded_video_format->frame_rate_den = 1;
1177 decoded_video_format->second_field_start = 335;
1178 decoded_video_format->interlaced = true;
1182 // 0x8 seems to be a flag about availability of deep color on the input,
1183 // except when it's not (e.g. it's the only difference between NTSC
1184 // and PAL). Rather confusing. But we clear it here nevertheless, because
1185 // usually it doesn't mean anything.
1187 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1188 uint16_t normalized_video_format = video_format & ~0xe80c;
1189 constexpr VideoFormatEntry entries[] = {
1190 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
1191 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
1192 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
1193 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
1194 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1195 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1196 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
1197 { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
1198 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
1199 { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
1200 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
1201 { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
1202 { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
1203 { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
1204 { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
1206 for (const VideoFormatEntry &entry : entries) {
1207 if (normalized_video_format == entry.normalized_video_format) {
1208 decoded_video_format->width = entry.width;
1209 decoded_video_format->height = entry.height;
1210 decoded_video_format->second_field_start = entry.second_field_start;
1211 decoded_video_format->extra_lines_top = entry.extra_lines_top;
1212 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1213 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1214 decoded_video_format->frame_rate_den = entry.frame_rate_den;
1215 decoded_video_format->interlaced = entry.interlaced;
1220 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1221 decoded_video_format->width = 1280;
1222 decoded_video_format->height = 720;
1223 decoded_video_format->frame_rate_nom = 60;
1224 decoded_video_format->frame_rate_den = 1;