1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEADER_SIZE 44
38 //#define HEADER_SIZE 0
39 #define AUDIO_HEADER_SIZE 4
41 #define FRAME_SIZE (8 << 20) // 8 MB.
42 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
49 atomic<bool> should_quit;
51 int find_xfer_size_for_width(int width)
53 // Video seems to require isochronous packets scaled with the width;
54 // seemingly six lines is about right, rounded up to the required 1kB
56 int size = width * 2 * 6;
57 // Note that for 10-bit input, you'll need to increase size accordingly.
58 //size = size * 4 / 3;
59 if (size % 1024 != 0) {
66 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
68 assert(width >= MIN_WIDTH);
69 size_t size = find_xfer_size_for_width(width);
70 int num_iso_pack = xfr->length / size;
71 if (num_iso_pack != xfr->num_iso_packets ||
72 size != xfr->iso_packet_desc[0].length) {
73 xfr->num_iso_packets = num_iso_pack;
74 libusb_set_iso_packet_lengths(xfr, size);
80 FrameAllocator::~FrameAllocator() {}
82 // Audio is more important than video, and also much cheaper.
83 // By having many more audio frames available, hopefully if something
84 // starts to drop, we'll have CPU load go down (from not having to
85 // process as much video) before we have to drop audio.
86 #define NUM_QUEUED_VIDEO_FRAMES 16
87 #define NUM_QUEUED_AUDIO_FRAMES 64
89 class MallocFrameAllocator : public FrameAllocator {
91 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
92 Frame alloc_frame() override;
93 void release_frame(Frame frame) override;
99 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
102 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
103 : frame_size(frame_size)
105 for (size_t i = 0; i < num_queued_frames; ++i) {
106 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
110 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
115 unique_lock<mutex> lock(freelist_mutex); // Meh.
116 if (freelist.empty()) {
117 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
120 vf.data = freelist.top().release();
121 vf.size = frame_size;
122 freelist.pop(); // Meh.
127 void MallocFrameAllocator::release_frame(Frame frame)
129 if (frame.overflow > 0) {
130 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
132 unique_lock<mutex> lock(freelist_mutex);
133 freelist.push(unique_ptr<uint8_t[]>(frame.data));
136 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
141 return (b - a < 0x8000);
143 int wrap_b = 0x10000 + int(b);
144 return (wrap_b - a < 0x8000);
148 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
150 unique_lock<mutex> lock(queue_lock);
151 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
152 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
153 q->back().timecode, timecode);
154 frame.owner->release_frame(frame);
160 qf.timecode = timecode;
162 q->push_back(move(qf));
163 queues_not_empty.notify_one(); // might be spurious
166 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
168 FILE *fp = fopen(filename, "wb");
169 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
170 printf("short write!\n");
175 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
177 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
180 void BMUSBCapture::dequeue_thread_func()
182 if (has_dequeue_callbacks) {
183 dequeue_init_callback();
185 while (!dequeue_thread_should_quit) {
186 unique_lock<mutex> lock(queue_lock);
187 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
189 if (dequeue_thread_should_quit) break;
191 uint16_t video_timecode = pending_video_frames.front().timecode;
192 uint16_t audio_timecode = pending_audio_frames.front().timecode;
193 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
194 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
196 QueuedFrame video_frame = pending_video_frames.front();
197 pending_video_frames.pop_front();
199 video_frame_allocator->release_frame(video_frame.frame);
200 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
201 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
203 QueuedFrame audio_frame = pending_audio_frames.front();
204 pending_audio_frames.pop_front();
206 frame_callback(audio_timecode,
207 FrameAllocator::Frame(), 0, 0x0000,
208 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
210 QueuedFrame video_frame = pending_video_frames.front();
211 QueuedFrame audio_frame = pending_audio_frames.front();
212 pending_audio_frames.pop_front();
213 pending_video_frames.pop_front();
218 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
219 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
220 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
223 frame_callback(video_timecode,
224 video_frame.frame, HEADER_SIZE, video_frame.format,
225 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
228 if (has_dequeue_callbacks) {
229 dequeue_cleanup_callback();
233 void BMUSBCapture::start_new_frame(const uint8_t *start)
235 uint16_t format = (start[3] << 8) | start[2];
236 uint16_t timecode = (start[1] << 8) | start[0];
238 if (current_video_frame.len > 0) {
239 // If format is 0x0800 (no signal), add a fake (empty) audio
240 // frame to get it out of the queue.
241 // TODO: Figure out if there are other formats that come with
242 // no audio, and treat them the same.
243 if (format == 0x0800) {
244 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
245 if (fake_audio_frame.data == nullptr) {
246 // Oh well, it's just a no-signal frame anyway.
247 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
248 current_video_frame.owner->release_frame(current_video_frame);
249 current_video_frame = video_frame_allocator->alloc_frame();
252 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
255 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
257 // Update the assumed frame width. We might be one frame too late on format changes,
258 // but it's much better than asking the user to choose manually.
259 unsigned width, height, second_field_start, extra_lines_top, extra_lines_bottom, frame_rate_nom, frame_rate_den;
261 if (decode_video_format(format, &width, &height, &second_field_start, &extra_lines_top, &extra_lines_bottom,
262 &frame_rate_nom, &frame_rate_den, &interlaced)) {
263 assumed_frame_width = width;
266 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
268 // //start[7], start[6], start[5], start[4],
269 // read_current_frame, FRAME_SIZE);
271 current_video_frame = video_frame_allocator->alloc_frame();
272 //if (current_video_frame.data == nullptr) {
273 // read_current_frame = -1;
275 // read_current_frame = 0;
279 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
281 uint16_t format = (start[3] << 8) | start[2];
282 uint16_t timecode = (start[1] << 8) | start[0];
283 if (current_audio_frame.len > 0) {
284 //dump_audio_block();
285 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
287 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
288 // format, timecode, read_current_audio_block);
289 current_audio_frame = audio_frame_allocator->alloc_frame();
293 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
295 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
296 for (unsigned j = 0; j < pack->actual_length; j++) {
297 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
298 printf("%02x", xfr->buffer[j + offset]);
301 else if ((j % 8) == 7)
309 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
312 uint8_t *dptr1 = dest1;
313 uint8_t *dptr2 = dest2;
315 for (size_t i = 0; i < n; i += 2) {
321 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
323 if (current_frame->data == nullptr ||
324 current_frame->len > current_frame->size ||
329 int bytes = end - start;
330 if (current_frame->len + bytes > current_frame->size) {
331 current_frame->overflow = current_frame->len + bytes - current_frame->size;
332 current_frame->len = current_frame->size;
333 if (current_frame->overflow > 1048576) {
334 printf("%d bytes overflow after last %s frame\n",
335 int(current_frame->overflow), frame_type_name);
336 current_frame->overflow = 0;
340 if (current_frame->interleaved) {
341 uint8_t *data = current_frame->data + current_frame->len / 2;
342 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
343 if (current_frame->len % 2 == 1) {
347 if (bytes % 2 == 1) {
350 ++current_frame->len;
353 memcpy_interleaved(data, data2, start, bytes);
354 current_frame->len += bytes;
356 memcpy(current_frame->data + current_frame->len, start, bytes);
357 current_frame->len += bytes;
365 void avx2_dump(const char *name, __m256i n)
367 printf("%-10s:", name);
368 printf(" %02x", _mm256_extract_epi8(n, 0));
369 printf(" %02x", _mm256_extract_epi8(n, 1));
370 printf(" %02x", _mm256_extract_epi8(n, 2));
371 printf(" %02x", _mm256_extract_epi8(n, 3));
372 printf(" %02x", _mm256_extract_epi8(n, 4));
373 printf(" %02x", _mm256_extract_epi8(n, 5));
374 printf(" %02x", _mm256_extract_epi8(n, 6));
375 printf(" %02x", _mm256_extract_epi8(n, 7));
377 printf(" %02x", _mm256_extract_epi8(n, 8));
378 printf(" %02x", _mm256_extract_epi8(n, 9));
379 printf(" %02x", _mm256_extract_epi8(n, 10));
380 printf(" %02x", _mm256_extract_epi8(n, 11));
381 printf(" %02x", _mm256_extract_epi8(n, 12));
382 printf(" %02x", _mm256_extract_epi8(n, 13));
383 printf(" %02x", _mm256_extract_epi8(n, 14));
384 printf(" %02x", _mm256_extract_epi8(n, 15));
386 printf(" %02x", _mm256_extract_epi8(n, 16));
387 printf(" %02x", _mm256_extract_epi8(n, 17));
388 printf(" %02x", _mm256_extract_epi8(n, 18));
389 printf(" %02x", _mm256_extract_epi8(n, 19));
390 printf(" %02x", _mm256_extract_epi8(n, 20));
391 printf(" %02x", _mm256_extract_epi8(n, 21));
392 printf(" %02x", _mm256_extract_epi8(n, 22));
393 printf(" %02x", _mm256_extract_epi8(n, 23));
395 printf(" %02x", _mm256_extract_epi8(n, 24));
396 printf(" %02x", _mm256_extract_epi8(n, 25));
397 printf(" %02x", _mm256_extract_epi8(n, 26));
398 printf(" %02x", _mm256_extract_epi8(n, 27));
399 printf(" %02x", _mm256_extract_epi8(n, 28));
400 printf(" %02x", _mm256_extract_epi8(n, 29));
401 printf(" %02x", _mm256_extract_epi8(n, 30));
402 printf(" %02x", _mm256_extract_epi8(n, 31));
407 // Does a memcpy and memchr in one to reduce processing time.
408 // Note that the benefit is somewhat limited if your L3 cache is small,
409 // as you'll (unfortunately) spend most of the time loading the data
412 // Complicated cases are left to the slow path; it basically stops copying
413 // up until the first instance of "sync_char" (usually a bit before, actually).
414 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
415 // data, and what we really need this for is the 00 00 ff ff marker in video data.
416 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
418 if (current_frame->data == nullptr ||
419 current_frame->len > current_frame->size ||
423 size_t orig_bytes = limit - start;
424 if (orig_bytes < 128) {
429 // Don't read more bytes than we can write.
430 limit = min(limit, start + (current_frame->size - current_frame->len));
432 // Align end to 32 bytes.
433 limit = (const uint8_t *)(intptr_t(limit) & ~31);
435 if (start >= limit) {
439 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
440 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
441 if (aligned_start != start) {
442 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
443 if (sync_start == nullptr) {
444 add_to_frame(current_frame, "", start, aligned_start);
446 add_to_frame(current_frame, "", start, sync_start);
451 // Make the length a multiple of 64.
452 if (current_frame->interleaved) {
453 if (((limit - aligned_start) % 64) != 0) {
456 assert(((limit - aligned_start) % 64) == 0);
460 const __m256i needle = _mm256_set1_epi8(sync_char);
462 const __restrict __m256i *in = (const __m256i *)aligned_start;
463 if (current_frame->interleaved) {
464 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
465 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
466 if (current_frame->len % 2 == 1) {
470 __m256i shuffle_cw = _mm256_set_epi8(
471 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
472 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
473 while (in < (const __m256i *)limit) {
474 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
475 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
476 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
478 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
479 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
480 __m256i found = _mm256_or_si256(found1, found2);
482 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
483 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
485 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
486 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
488 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
489 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
491 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
492 _mm256_storeu_si256(out2, hi);
494 if (!_mm256_testz_si256(found, found)) {
502 current_frame->len += (uint8_t *)in - aligned_start;
504 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
505 while (in < (const __m256i *)limit) {
506 __m256i data = _mm256_load_si256(in);
507 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
508 __m256i found = _mm256_cmpeq_epi8(data, needle);
509 if (!_mm256_testz_si256(found, found)) {
516 current_frame->len = (uint8_t *)out - current_frame->data;
519 const __m128i needle = _mm_set1_epi8(sync_char);
521 const __m128i *in = (const __m128i *)aligned_start;
522 if (current_frame->interleaved) {
523 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
524 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
525 if (current_frame->len % 2 == 1) {
529 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
530 while (in < (const __m128i *)limit) {
531 __m128i data1 = _mm_load_si128(in);
532 __m128i data2 = _mm_load_si128(in + 1);
533 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
534 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
535 __m128i data1_hi = _mm_srli_epi16(data1, 8);
536 __m128i data2_hi = _mm_srli_epi16(data2, 8);
537 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
538 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
539 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
540 _mm_storeu_si128(out2, hi);
541 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
542 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
543 if (!_mm_testz_si128(found1, found1) ||
544 !_mm_testz_si128(found2, found2)) {
552 current_frame->len += (uint8_t *)in - aligned_start;
554 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
555 while (in < (const __m128i *)limit) {
556 __m128i data = _mm_load_si128(in);
557 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
558 __m128i found = _mm_cmpeq_epi8(data, needle);
559 if (!_mm_testz_si128(found, found)) {
566 current_frame->len = (uint8_t *)out - current_frame->data;
570 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
572 return (const uint8_t *)in;
576 void decode_packs(const libusb_transfer *xfr,
577 const char *sync_pattern,
579 FrameAllocator::Frame *current_frame,
580 const char *frame_type_name,
581 function<void(const uint8_t *start)> start_callback)
584 for (int i = 0; i < xfr->num_iso_packets; i++) {
585 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
587 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
588 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
593 const uint8_t *start = xfr->buffer + offset;
594 const uint8_t *limit = start + pack->actual_length;
595 while (start < limit) { // Usually runs only one iteration.
597 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
598 if (start == limit) break;
599 assert(start < limit);
602 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
603 if (start_next_frame == nullptr) {
604 // add the rest of the buffer
605 add_to_frame(current_frame, frame_type_name, start, limit);
608 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
609 start = start_next_frame + sync_length; // skip sync
610 start_callback(start);
614 dump_pack(xfr, offset, pack);
616 offset += pack->length;
620 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
622 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
623 fprintf(stderr, "transfer status %d\n", xfr->status);
624 libusb_free_transfer(xfr);
628 assert(xfr->user_data != nullptr);
629 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
631 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
632 if (xfr->endpoint == 0x84) {
633 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
635 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
637 // Update the transfer with the new assumed width, if we're in the process of changing formats.
638 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
641 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
642 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
643 uint8_t *buf = libusb_control_transfer_get_data(xfr);
645 if (setup->wIndex == 44) {
646 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
648 printf("read register %2d: 0x%02x%02x%02x%02x\n",
649 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
652 memcpy(usb->register_file + usb->current_register, buf, 4);
653 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
654 if (usb->current_register == 0) {
655 // read through all of them
656 printf("register dump:");
657 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
658 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
662 libusb_fill_control_setup(xfr->buffer,
663 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
664 /*index=*/usb->current_register, /*length=*/4);
669 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
670 for (i = 0; i < xfr->actual_length; i++) {
671 printf("%02x", xfr->buffer[i]);
681 int rc = libusb_submit_transfer(xfr);
683 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
688 void BMUSBCapture::usb_thread_func()
691 memset(¶m, 0, sizeof(param));
692 param.sched_priority = 1;
693 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
694 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
696 while (!should_quit) {
697 int rc = libusb_handle_events(nullptr);
698 if (rc != LIBUSB_SUCCESS)
703 struct USBCardDevice {
706 libusb_device *device;
709 libusb_device_handle *open_card(int card_index)
711 libusb_device **devices;
712 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
713 if (num_devices == -1) {
714 fprintf(stderr, "Error finding USB devices\n");
717 vector<USBCardDevice> found_cards;
718 for (ssize_t i = 0; i < num_devices; ++i) {
719 libusb_device_descriptor desc;
720 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
721 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
725 uint8_t bus = libusb_get_bus_number(devices[i]);
726 uint8_t port = libusb_get_port_number(devices[i]);
728 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
729 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
730 libusb_unref_device(devices[i]);
734 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
736 libusb_free_device_list(devices, 0);
738 // Sort the devices to get a consistent ordering.
739 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
740 if (a.product != b.product)
741 return a.product < b.product;
743 return a.bus < b.bus;
744 return a.port < b.port;
747 for (size_t i = 0; i < found_cards.size(); ++i) {
748 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
749 if (found_cards[i].product == 0xbd3b) {
750 fprintf(stderr, "Intensity Shuttle\n");
751 } else if (found_cards[i].product == 0xbd4f) {
752 fprintf(stderr, "UltraStudio SDI\n");
758 if (size_t(card_index) >= found_cards.size()) {
759 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
763 libusb_device_handle *devh;
764 int rc = libusb_open(found_cards[card_index].device, &devh);
766 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
770 for (size_t i = 0; i < found_cards.size(); ++i) {
771 libusb_unref_device(found_cards[i].device);
777 void BMUSBCapture::configure_card()
779 if (video_frame_allocator == nullptr) {
780 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
782 if (audio_frame_allocator == nullptr) {
783 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
785 dequeue_thread_should_quit = false;
786 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
789 struct libusb_transfer *xfr;
791 rc = libusb_init(nullptr);
793 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
797 libusb_device_handle *devh = open_card(card_index);
799 fprintf(stderr, "Error finding USB device\n");
803 libusb_config_descriptor *config;
804 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
806 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
811 printf("%d interface\n", config->bNumInterfaces);
812 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
813 printf(" interface %d\n", interface_number);
814 const libusb_interface *interface = &config->interface[interface_number];
815 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
816 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
817 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
818 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
819 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
820 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
826 rc = libusb_set_configuration(devh, /*configuration=*/1);
828 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
832 rc = libusb_claim_interface(devh, 0);
834 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
838 // Alternate setting 1 is output, alternate setting 2 is input.
839 // Card is reset when switching alternates, so the driver uses
840 // this “double switch” when it wants to reset.
842 // There's also alternate settings 3 and 4, which seem to be
843 // like 1 and 2 except they advertise less bandwidth needed.
844 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
846 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
849 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
851 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
855 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
857 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
863 rc = libusb_claim_interface(devh, 3);
865 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
871 // 44 is some kind of timer register (first 16 bits count upwards)
872 // 24 is some sort of watchdog?
873 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
874 // (or will go to 0x73c60010?), also seen 0x73c60100
875 // 12 also changes all the time, unclear why
876 // 16 seems to be autodetected mode somehow
877 // -- this is e00115e0 after reset?
878 // ed0115e0 after mode change [to output?]
879 // 2d0015e0 after more mode change [to input]
880 // ed0115e0 after more mode change
881 // 2d0015e0 after more mode change
883 // 390115e0 seems to indicate we have signal
884 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
886 // 200015e0 on startup
887 // changes to 250115e0 when we sync to the signal
889 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
891 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
893 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
894 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
896 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
897 // perhaps some of them are related to analog output?
899 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
900 // but the driver sets it to 0x8036802a at some point.
902 // all of this is on request 214/215. other requests (192, 219,
903 // 222, 223, 224) are used for firmware upgrade. Probably best to
904 // stay out of it unless you know what you're doing.
908 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
911 // 0x01 - stable signal
913 // 0x08 - unknown (audio??)
923 static const ctrl ctrls[] = {
924 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
925 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
927 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
929 // clearing the 0x08000000 bit seems to change the capture format (other source?)
930 // 0x10000000 = analog audio instead of embedded audio, it seems
931 // 0x3a000000 = component video? (analog audio)
932 // 0x3c000000 = composite video? (analog audio)
933 // 0x3e000000 = s-video? (analog audio)
934 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
935 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
936 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
937 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
938 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
941 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
942 uint32_t flipped = htonl(ctrls[req].data);
943 static uint8_t value[4];
944 memcpy(value, &flipped, sizeof(flipped));
945 int size = sizeof(value);
946 //if (ctrls[req].request == 215) size = 0;
947 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
948 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
950 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
954 if (ctrls[req].index == 16 && rc == 4) {
955 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
959 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
960 for (int i = 0; i < rc; ++i) {
961 printf("%02x", value[i]);
970 static int my_index = 0;
971 static uint8_t value[4];
972 int size = sizeof(value);
973 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
974 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
976 fprintf(stderr, "Error on control\n");
979 printf("rc=%d index=%d: 0x", rc, my_index);
980 for (int i = 0; i < rc; ++i) {
981 printf("%02x", value[i]);
988 // set up an asynchronous transfer of the timer register
989 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
990 static int completed = 0;
992 xfr = libusb_alloc_transfer(0);
993 libusb_fill_control_setup(cmdbuf,
994 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
995 /*index=*/44, /*length=*/4);
996 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
997 xfr->user_data = this;
998 libusb_submit_transfer(xfr);
1000 // set up an asynchronous transfer of register 24
1001 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1002 static int completed2 = 0;
1004 xfr = libusb_alloc_transfer(0);
1005 libusb_fill_control_setup(cmdbuf2,
1006 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1007 /*index=*/24, /*length=*/4);
1008 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1009 xfr->user_data = this;
1010 libusb_submit_transfer(xfr);
1013 // set up an asynchronous transfer of the register dump
1014 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1015 static int completed3 = 0;
1017 xfr = libusb_alloc_transfer(0);
1018 libusb_fill_control_setup(cmdbuf3,
1019 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1020 /*index=*/current_register, /*length=*/4);
1021 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1022 xfr->user_data = this;
1023 //libusb_submit_transfer(xfr);
1025 audiofp = fopen("audio.raw", "wb");
1027 // set up isochronous transfers for audio and video
1028 for (int e = 3; e <= 4; ++e) {
1029 //int num_transfers = (e == 3) ? 6 : 6;
1030 int num_transfers = 10;
1031 for (int i = 0; i < num_transfers; ++i) {
1033 int num_iso_pack, size;
1035 // Allocate for minimum width (because that will give us the most
1036 // number of packets, so we don't need to reallocated, but we'll
1037 // default to 720p for the first frame.
1038 size = find_xfer_size_for_width(MIN_WIDTH);
1039 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1040 buf_size = USB_VIDEO_TRANSFER_SIZE;
1044 buf_size = num_iso_pack * size;
1046 assert(size_t(num_iso_pack * size) <= buf_size);
1047 uint8_t *buf = new uint8_t[buf_size];
1049 xfr = libusb_alloc_transfer(num_iso_pack);
1051 fprintf(stderr, "oom\n");
1055 int ep = LIBUSB_ENDPOINT_IN | e;
1056 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1057 num_iso_pack, cb_xfr, nullptr, 0);
1058 libusb_set_iso_packet_lengths(xfr, size);
1059 xfr->user_data = this;
1062 change_xfer_size_for_width(assumed_frame_width, xfr);
1065 iso_xfrs.push_back(xfr);
1070 void BMUSBCapture::start_bm_capture()
1073 for (libusb_transfer *xfr : iso_xfrs) {
1074 int rc = libusb_submit_transfer(xfr);
1077 //printf("num_bytes=%d\n", num_bytes);
1078 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1079 xfr->endpoint, i, libusb_error_name(rc));
1086 libusb_release_interface(devh, 0);
1090 libusb_exit(nullptr);
1095 void BMUSBCapture::stop_dequeue_thread()
1097 dequeue_thread_should_quit = true;
1098 queues_not_empty.notify_all();
1099 dequeue_thread.join();
1102 void BMUSBCapture::start_bm_thread()
1104 should_quit = false;
1105 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1108 void BMUSBCapture::stop_bm_thread()
1114 struct VideoFormatEntry {
1115 uint16_t normalized_video_format;
1116 unsigned width, height, second_field_start;
1117 unsigned extra_lines_top, extra_lines_bottom;
1118 unsigned frame_rate_nom, frame_rate_den;
1122 bool decode_video_format(uint16_t video_format, unsigned *width, unsigned *height, unsigned *second_field_start,
1123 unsigned *extra_lines_top, unsigned *extra_lines_bottom,
1124 unsigned *frame_rate_nom, unsigned *frame_rate_den, bool *interlaced)
1126 *interlaced = false;
1128 // TODO: Add these for all formats as we find them.
1129 *extra_lines_top = *extra_lines_bottom = *second_field_start = 0;
1131 if (video_format == 0x0800) {
1132 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1133 // It's a strange thing, but what can you do.
1136 *extra_lines_top = 0;
1137 *extra_lines_bottom = 0;
1138 *frame_rate_nom = 3013;
1139 *frame_rate_den = 100;
1142 if ((video_format & 0xe800) != 0xe800) {
1143 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1147 *extra_lines_top = 0;
1148 *extra_lines_bottom = 0;
1149 *frame_rate_nom = 60;
1150 *frame_rate_den = 1;
1154 // NTSC (480i59.94, I suppose). A special case, see below.
1155 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1158 *frame_rate_nom = 30000;
1159 *frame_rate_den = 1001;
1160 *second_field_start = *height / 2; // TODO
1165 // PAL (576i50, I suppose). A special case, see below.
1166 if (video_format == 0xe909 || video_format == 0xe9c9) {
1169 *extra_lines_top = 22;
1170 *extra_lines_bottom = 27;
1171 *frame_rate_nom = 25;
1172 *frame_rate_den = 1;
1173 *second_field_start = 334;
1178 // 0x8 seems to be a flag about availability of deep color on the input,
1179 // except when it's not (e.g. it's the only difference between NTSC
1180 // and PAL). Rather confusing. But we clear it here nevertheless, because
1181 // usually it doesn't mean anything.
1182 uint16_t normalized_video_format = video_format & ~0xe808;
1183 constexpr VideoFormatEntry entries[] = {
1184 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
1185 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1186 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
1187 { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
1188 { 0x0003, 1920, 1080, 582, 20, 25, 30, 1, true }, // 1080i60.
1189 { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
1190 { 0x0021, 1920, 1080, 582, 20, 25, 30000, 1001, true }, // 1080i59.94.
1191 { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
1192 { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
1193 { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
1194 { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
1196 for (const VideoFormatEntry &entry : entries) {
1197 if (normalized_video_format == entry.normalized_video_format) {
1198 *width = entry.width;
1199 *height = entry.height;
1200 *second_field_start = entry.second_field_start;
1201 *extra_lines_top = entry.extra_lines_top;
1202 *extra_lines_bottom = entry.extra_lines_bottom;
1203 *frame_rate_nom = entry.frame_rate_nom;
1204 *frame_rate_den = entry.frame_rate_den;
1205 *interlaced = entry.interlaced;
1210 printf("Unknown video format 0x%04x. Assuming 720p60.\n", video_format);
1213 *frame_rate_nom = 60;
1214 *frame_rate_den = 1;