1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEADER_SIZE 44
38 //#define HEADER_SIZE 0
39 #define AUDIO_HEADER_SIZE 4
41 #define FRAME_SIZE (8 << 20) // 8 MB.
42 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
49 atomic<bool> should_quit;
51 int find_xfer_size_for_width(int width)
53 // Video seems to require isochronous packets scaled with the width;
54 // seemingly six lines is about right, rounded up to the required 1kB
56 int size = width * 2 * 6;
57 // Note that for 10-bit input, you'll need to increase size accordingly.
58 //size = size * 4 / 3;
59 if (size % 1024 != 0) {
66 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
68 assert(width >= MIN_WIDTH);
69 size_t size = find_xfer_size_for_width(width);
70 int num_iso_pack = xfr->length / size;
71 if (num_iso_pack != xfr->num_iso_packets ||
72 size != xfr->iso_packet_desc[0].length) {
73 xfr->num_iso_packets = num_iso_pack;
74 libusb_set_iso_packet_lengths(xfr, size);
80 FrameAllocator::~FrameAllocator() {}
82 // Audio is more important than video, and also much cheaper.
83 // By having many more audio frames available, hopefully if something
84 // starts to drop, we'll have CPU load go down (from not having to
85 // process as much video) before we have to drop audio.
86 #define NUM_QUEUED_VIDEO_FRAMES 16
87 #define NUM_QUEUED_AUDIO_FRAMES 64
89 class MallocFrameAllocator : public FrameAllocator {
91 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
92 Frame alloc_frame() override;
93 void release_frame(Frame frame) override;
99 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
102 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
103 : frame_size(frame_size)
105 for (size_t i = 0; i < num_queued_frames; ++i) {
106 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
110 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
115 unique_lock<mutex> lock(freelist_mutex); // Meh.
116 if (freelist.empty()) {
117 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
120 vf.data = freelist.top().release();
121 vf.size = frame_size;
122 freelist.pop(); // Meh.
127 void MallocFrameAllocator::release_frame(Frame frame)
129 if (frame.overflow > 0) {
130 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
132 unique_lock<mutex> lock(freelist_mutex);
133 freelist.push(unique_ptr<uint8_t[]>(frame.data));
136 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
141 return (b - a < 0x8000);
143 int wrap_b = 0x10000 + int(b);
144 return (wrap_b - a < 0x8000);
148 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
150 unique_lock<mutex> lock(queue_lock);
151 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
152 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
153 q->back().timecode, timecode);
154 frame.owner->release_frame(frame);
160 qf.timecode = timecode;
162 q->push_back(move(qf));
163 queues_not_empty.notify_one(); // might be spurious
166 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
168 FILE *fp = fopen(filename, "wb");
169 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
170 printf("short write!\n");
175 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
177 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
180 void BMUSBCapture::dequeue_thread_func()
182 if (has_dequeue_callbacks) {
183 dequeue_init_callback();
185 while (!dequeue_thread_should_quit) {
186 unique_lock<mutex> lock(queue_lock);
187 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
189 if (dequeue_thread_should_quit) break;
191 uint16_t video_timecode = pending_video_frames.front().timecode;
192 uint16_t audio_timecode = pending_audio_frames.front().timecode;
193 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
194 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
196 QueuedFrame video_frame = pending_video_frames.front();
197 pending_video_frames.pop_front();
199 video_frame_allocator->release_frame(video_frame.frame);
200 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
201 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
203 QueuedFrame audio_frame = pending_audio_frames.front();
204 pending_audio_frames.pop_front();
206 frame_callback(audio_timecode,
207 FrameAllocator::Frame(), 0, 0x0000,
208 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
210 QueuedFrame video_frame = pending_video_frames.front();
211 QueuedFrame audio_frame = pending_audio_frames.front();
212 pending_audio_frames.pop_front();
213 pending_video_frames.pop_front();
218 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
219 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
220 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
223 frame_callback(video_timecode,
224 video_frame.frame, HEADER_SIZE, video_frame.format,
225 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
228 if (has_dequeue_callbacks) {
229 dequeue_cleanup_callback();
233 void BMUSBCapture::start_new_frame(const uint8_t *start)
235 uint16_t format = (start[3] << 8) | start[2];
236 uint16_t timecode = (start[1] << 8) | start[0];
238 if (current_video_frame.len > 0) {
239 // If format is 0x0800 (no signal), add a fake (empty) audio
240 // frame to get it out of the queue.
241 // TODO: Figure out if there are other formats that come with
242 // no audio, and treat them the same.
243 if (format == 0x0800) {
244 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
245 if (fake_audio_frame.data == nullptr) {
246 // Oh well, it's just a no-signal frame anyway.
247 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
248 current_video_frame.owner->release_frame(current_video_frame);
249 current_video_frame = video_frame_allocator->alloc_frame();
252 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
255 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
257 // Update the assumed frame width. We might be one frame too late on format changes,
258 // but it's much better than asking the user to choose manually.
259 int width, height, frame_rate_nom, frame_rate_den;
261 if (decode_video_format(format, &width, &height, &frame_rate_nom, &frame_rate_den, &interlaced)) {
262 assumed_frame_width = width;
265 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
267 // //start[7], start[6], start[5], start[4],
268 // read_current_frame, FRAME_SIZE);
270 current_video_frame = video_frame_allocator->alloc_frame();
271 //if (current_video_frame.data == nullptr) {
272 // read_current_frame = -1;
274 // read_current_frame = 0;
278 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
280 uint16_t format = (start[3] << 8) | start[2];
281 uint16_t timecode = (start[1] << 8) | start[0];
282 if (current_audio_frame.len > 0) {
283 //dump_audio_block();
284 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
286 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
287 // format, timecode, read_current_audio_block);
288 current_audio_frame = audio_frame_allocator->alloc_frame();
292 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
294 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
295 for (unsigned j = 0; j < pack->actual_length; j++) {
296 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
297 printf("%02x", xfr->buffer[j + offset]);
300 else if ((j % 8) == 7)
308 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
311 uint8_t *dptr1 = dest1;
312 uint8_t *dptr2 = dest2;
314 for (size_t i = 0; i < n; i += 2) {
320 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
322 if (current_frame->data == nullptr ||
323 current_frame->len > current_frame->size ||
328 int bytes = end - start;
329 if (current_frame->len + bytes > current_frame->size) {
330 current_frame->overflow = current_frame->len + bytes - current_frame->size;
331 current_frame->len = current_frame->size;
332 if (current_frame->overflow > 1048576) {
333 printf("%d bytes overflow after last %s frame\n",
334 int(current_frame->overflow), frame_type_name);
335 current_frame->overflow = 0;
339 if (current_frame->interleaved) {
340 uint8_t *data = current_frame->data + current_frame->len / 2;
341 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
342 if (current_frame->len % 2 == 1) {
346 if (bytes % 2 == 1) {
349 ++current_frame->len;
352 memcpy_interleaved(data, data2, start, bytes);
353 current_frame->len += bytes;
355 memcpy(current_frame->data + current_frame->len, start, bytes);
356 current_frame->len += bytes;
364 void avx2_dump(const char *name, __m256i n)
366 printf("%-10s:", name);
367 printf(" %02x", _mm256_extract_epi8(n, 0));
368 printf(" %02x", _mm256_extract_epi8(n, 1));
369 printf(" %02x", _mm256_extract_epi8(n, 2));
370 printf(" %02x", _mm256_extract_epi8(n, 3));
371 printf(" %02x", _mm256_extract_epi8(n, 4));
372 printf(" %02x", _mm256_extract_epi8(n, 5));
373 printf(" %02x", _mm256_extract_epi8(n, 6));
374 printf(" %02x", _mm256_extract_epi8(n, 7));
376 printf(" %02x", _mm256_extract_epi8(n, 8));
377 printf(" %02x", _mm256_extract_epi8(n, 9));
378 printf(" %02x", _mm256_extract_epi8(n, 10));
379 printf(" %02x", _mm256_extract_epi8(n, 11));
380 printf(" %02x", _mm256_extract_epi8(n, 12));
381 printf(" %02x", _mm256_extract_epi8(n, 13));
382 printf(" %02x", _mm256_extract_epi8(n, 14));
383 printf(" %02x", _mm256_extract_epi8(n, 15));
385 printf(" %02x", _mm256_extract_epi8(n, 16));
386 printf(" %02x", _mm256_extract_epi8(n, 17));
387 printf(" %02x", _mm256_extract_epi8(n, 18));
388 printf(" %02x", _mm256_extract_epi8(n, 19));
389 printf(" %02x", _mm256_extract_epi8(n, 20));
390 printf(" %02x", _mm256_extract_epi8(n, 21));
391 printf(" %02x", _mm256_extract_epi8(n, 22));
392 printf(" %02x", _mm256_extract_epi8(n, 23));
394 printf(" %02x", _mm256_extract_epi8(n, 24));
395 printf(" %02x", _mm256_extract_epi8(n, 25));
396 printf(" %02x", _mm256_extract_epi8(n, 26));
397 printf(" %02x", _mm256_extract_epi8(n, 27));
398 printf(" %02x", _mm256_extract_epi8(n, 28));
399 printf(" %02x", _mm256_extract_epi8(n, 29));
400 printf(" %02x", _mm256_extract_epi8(n, 30));
401 printf(" %02x", _mm256_extract_epi8(n, 31));
406 // Does a memcpy and memchr in one to reduce processing time.
407 // Note that the benefit is somewhat limited if your L3 cache is small,
408 // as you'll (unfortunately) spend most of the time loading the data
411 // Complicated cases are left to the slow path; it basically stops copying
412 // up until the first instance of "sync_char" (usually a bit before, actually).
413 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
414 // data, and what we really need this for is the 00 00 ff ff marker in video data.
415 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
417 if (current_frame->data == nullptr ||
418 current_frame->len > current_frame->size ||
422 size_t orig_bytes = limit - start;
423 if (orig_bytes < 128) {
428 // Don't read more bytes than we can write.
429 limit = min(limit, start + (current_frame->size - current_frame->len));
431 // Align end to 32 bytes.
432 limit = (const uint8_t *)(intptr_t(limit) & ~31);
434 if (start >= limit) {
438 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
439 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
440 if (aligned_start != start) {
441 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
442 if (sync_start == nullptr) {
443 add_to_frame(current_frame, "", start, aligned_start);
445 add_to_frame(current_frame, "", start, sync_start);
450 // Make the length a multiple of 64.
451 if (current_frame->interleaved) {
452 if (((limit - aligned_start) % 64) != 0) {
455 assert(((limit - aligned_start) % 64) == 0);
459 const __m256i needle = _mm256_set1_epi8(sync_char);
461 const __restrict __m256i *in = (const __m256i *)aligned_start;
462 if (current_frame->interleaved) {
463 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
464 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
465 if (current_frame->len % 2 == 1) {
469 __m256i shuffle_cw = _mm256_set_epi8(
470 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
471 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
472 while (in < (const __m256i *)limit) {
473 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
474 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
475 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
477 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
478 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
479 __m256i found = _mm256_or_si256(found1, found2);
481 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
482 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
484 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
485 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
487 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
488 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
490 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
491 _mm256_storeu_si256(out2, hi);
493 if (!_mm256_testz_si256(found, found)) {
501 current_frame->len += (uint8_t *)in - aligned_start;
503 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
504 while (in < (const __m256i *)limit) {
505 __m256i data = _mm256_load_si256(in);
506 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
507 __m256i found = _mm256_cmpeq_epi8(data, needle);
508 if (!_mm256_testz_si256(found, found)) {
515 current_frame->len = (uint8_t *)out - current_frame->data;
518 const __m128i needle = _mm_set1_epi8(sync_char);
520 const __m128i *in = (const __m128i *)aligned_start;
521 if (current_frame->interleaved) {
522 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
523 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
524 if (current_frame->len % 2 == 1) {
528 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
529 while (in < (const __m128i *)limit) {
530 __m128i data1 = _mm_load_si128(in);
531 __m128i data2 = _mm_load_si128(in + 1);
532 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
533 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
534 __m128i data1_hi = _mm_srli_epi16(data1, 8);
535 __m128i data2_hi = _mm_srli_epi16(data2, 8);
536 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
537 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
538 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
539 _mm_storeu_si128(out2, hi);
540 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
541 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
542 if (!_mm_testz_si128(found1, found1) ||
543 !_mm_testz_si128(found2, found2)) {
551 current_frame->len += (uint8_t *)in - aligned_start;
553 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
554 while (in < (const __m128i *)limit) {
555 __m128i data = _mm_load_si128(in);
556 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
557 __m128i found = _mm_cmpeq_epi8(data, needle);
558 if (!_mm_testz_si128(found, found)) {
565 current_frame->len = (uint8_t *)out - current_frame->data;
569 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
571 return (const uint8_t *)in;
575 void decode_packs(const libusb_transfer *xfr,
576 const char *sync_pattern,
578 FrameAllocator::Frame *current_frame,
579 const char *frame_type_name,
580 function<void(const uint8_t *start)> start_callback)
583 for (int i = 0; i < xfr->num_iso_packets; i++) {
584 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
586 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
587 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
592 const uint8_t *start = xfr->buffer + offset;
593 const uint8_t *limit = start + pack->actual_length;
594 while (start < limit) { // Usually runs only one iteration.
596 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
597 if (start == limit) break;
598 assert(start < limit);
601 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
602 if (start_next_frame == nullptr) {
603 // add the rest of the buffer
604 add_to_frame(current_frame, frame_type_name, start, limit);
607 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
608 start = start_next_frame + sync_length; // skip sync
609 start_callback(start);
613 dump_pack(xfr, offset, pack);
615 offset += pack->length;
619 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
621 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
622 fprintf(stderr, "transfer status %d\n", xfr->status);
623 libusb_free_transfer(xfr);
627 assert(xfr->user_data != nullptr);
628 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
630 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
631 if (xfr->endpoint == 0x84) {
632 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
634 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
636 // Update the transfer with the new assumed width, if we're in the process of changing formats.
637 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
640 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
641 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
642 uint8_t *buf = libusb_control_transfer_get_data(xfr);
644 if (setup->wIndex == 44) {
645 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
647 printf("read register %2d: 0x%02x%02x%02x%02x\n",
648 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
651 memcpy(usb->register_file + usb->current_register, buf, 4);
652 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
653 if (usb->current_register == 0) {
654 // read through all of them
655 printf("register dump:");
656 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
657 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
661 libusb_fill_control_setup(xfr->buffer,
662 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
663 /*index=*/usb->current_register, /*length=*/4);
668 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
669 for (i = 0; i < xfr->actual_length; i++) {
670 printf("%02x", xfr->buffer[i]);
680 int rc = libusb_submit_transfer(xfr);
682 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
687 void BMUSBCapture::usb_thread_func()
690 memset(¶m, 0, sizeof(param));
691 param.sched_priority = 1;
692 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
693 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
695 while (!should_quit) {
696 int rc = libusb_handle_events(nullptr);
697 if (rc != LIBUSB_SUCCESS)
702 struct USBCardDevice {
705 libusb_device *device;
708 libusb_device_handle *open_card(int card_index)
710 libusb_device **devices;
711 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
712 if (num_devices == -1) {
713 fprintf(stderr, "Error finding USB devices\n");
716 vector<USBCardDevice> found_cards;
717 for (ssize_t i = 0; i < num_devices; ++i) {
718 libusb_device_descriptor desc;
719 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
720 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
724 uint8_t bus = libusb_get_bus_number(devices[i]);
725 uint8_t port = libusb_get_port_number(devices[i]);
727 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
728 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
729 libusb_unref_device(devices[i]);
733 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
735 libusb_free_device_list(devices, 0);
737 // Sort the devices to get a consistent ordering.
738 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
739 if (a.product != b.product)
740 return a.product < b.product;
742 return a.bus < b.bus;
743 return a.port < b.port;
746 for (size_t i = 0; i < found_cards.size(); ++i) {
747 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
748 if (found_cards[i].product == 0xbd3b) {
749 fprintf(stderr, "Intensity Shuttle\n");
750 } else if (found_cards[i].product == 0xbd4f) {
751 fprintf(stderr, "UltraStudio SDI\n");
757 if (size_t(card_index) >= found_cards.size()) {
758 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
762 libusb_device_handle *devh;
763 int rc = libusb_open(found_cards[card_index].device, &devh);
765 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
769 for (size_t i = 0; i < found_cards.size(); ++i) {
770 libusb_unref_device(found_cards[i].device);
776 void BMUSBCapture::configure_card()
778 if (video_frame_allocator == nullptr) {
779 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
781 if (audio_frame_allocator == nullptr) {
782 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
784 dequeue_thread_should_quit = false;
785 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
788 struct libusb_transfer *xfr;
790 rc = libusb_init(nullptr);
792 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
796 libusb_device_handle *devh = open_card(card_index);
798 fprintf(stderr, "Error finding USB device\n");
802 libusb_config_descriptor *config;
803 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
805 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
808 printf("%d interface\n", config->bNumInterfaces);
809 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
810 printf(" interface %d\n", interface_number);
811 const libusb_interface *interface = &config->interface[interface_number];
812 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
813 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
814 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
815 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
816 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
817 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
822 rc = libusb_set_configuration(devh, /*configuration=*/1);
824 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
828 rc = libusb_claim_interface(devh, 0);
830 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
834 // Alternate setting 1 is output, alternate setting 2 is input.
835 // Card is reset when switching alternates, so the driver uses
836 // this “double switch” when it wants to reset.
838 // There's also alternate settings 3 and 4, which seem to be
839 // like 1 and 2 except they advertise less bandwidth needed.
840 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
842 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
845 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
847 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
851 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
853 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
859 rc = libusb_claim_interface(devh, 3);
861 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
867 // 44 is some kind of timer register (first 16 bits count upwards)
868 // 24 is some sort of watchdog?
869 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
870 // (or will go to 0x73c60010?), also seen 0x73c60100
871 // 12 also changes all the time, unclear why
872 // 16 seems to be autodetected mode somehow
873 // -- this is e00115e0 after reset?
874 // ed0115e0 after mode change [to output?]
875 // 2d0015e0 after more mode change [to input]
876 // ed0115e0 after more mode change
877 // 2d0015e0 after more mode change
879 // 390115e0 seems to indicate we have signal
880 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
882 // 200015e0 on startup
883 // changes to 250115e0 when we sync to the signal
885 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
887 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
889 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
890 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
892 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
893 // perhaps some of them are related to analog output?
895 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
896 // but the driver sets it to 0x8036802a at some point.
898 // all of this is on request 214/215. other requests (192, 219,
899 // 222, 223, 224) are used for firmware upgrade. Probably best to
900 // stay out of it unless you know what you're doing.
904 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
907 // 0x01 - stable signal
909 // 0x08 - unknown (audio??)
919 static const ctrl ctrls[] = {
920 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
921 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
923 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
925 // clearing the 0x08000000 bit seems to change the capture format (other source?)
926 // 0x10000000 = analog audio instead of embedded audio, it seems
927 // 0x3a000000 = component video? (analog audio)
928 // 0x3c000000 = composite video? (analog audio)
929 // 0x3e000000 = s-video? (analog audio)
930 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
931 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
932 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
933 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
934 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
937 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
938 uint32_t flipped = htonl(ctrls[req].data);
939 static uint8_t value[4];
940 memcpy(value, &flipped, sizeof(flipped));
941 int size = sizeof(value);
942 //if (ctrls[req].request == 215) size = 0;
943 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
944 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
946 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
950 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
951 for (int i = 0; i < rc; ++i) {
952 printf("%02x", value[i]);
960 static int my_index = 0;
961 static uint8_t value[4];
962 int size = sizeof(value);
963 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
964 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
966 fprintf(stderr, "Error on control\n");
969 printf("rc=%d index=%d: 0x", rc, my_index);
970 for (int i = 0; i < rc; ++i) {
971 printf("%02x", value[i]);
978 // set up an asynchronous transfer of the timer register
979 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
980 static int completed = 0;
982 xfr = libusb_alloc_transfer(0);
983 libusb_fill_control_setup(cmdbuf,
984 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
985 /*index=*/44, /*length=*/4);
986 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
987 xfr->user_data = this;
988 libusb_submit_transfer(xfr);
990 // set up an asynchronous transfer of register 24
991 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
992 static int completed2 = 0;
994 xfr = libusb_alloc_transfer(0);
995 libusb_fill_control_setup(cmdbuf2,
996 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
997 /*index=*/24, /*length=*/4);
998 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
999 xfr->user_data = this;
1000 libusb_submit_transfer(xfr);
1003 // set up an asynchronous transfer of the register dump
1004 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1005 static int completed3 = 0;
1007 xfr = libusb_alloc_transfer(0);
1008 libusb_fill_control_setup(cmdbuf3,
1009 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1010 /*index=*/current_register, /*length=*/4);
1011 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1012 xfr->user_data = this;
1013 //libusb_submit_transfer(xfr);
1015 audiofp = fopen("audio.raw", "wb");
1017 // set up isochronous transfers for audio and video
1018 for (int e = 3; e <= 4; ++e) {
1019 //int num_transfers = (e == 3) ? 6 : 6;
1020 int num_transfers = 10;
1021 for (int i = 0; i < num_transfers; ++i) {
1023 int num_iso_pack, size;
1025 // Allocate for minimum width (because that will give us the most
1026 // number of packets, so we don't need to reallocated, but we'll
1027 // default to 720p for the first frame.
1028 size = find_xfer_size_for_width(MIN_WIDTH);
1029 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1030 buf_size = USB_VIDEO_TRANSFER_SIZE;
1034 buf_size = num_iso_pack * size;
1036 assert(size_t(num_iso_pack * size) <= buf_size);
1037 uint8_t *buf = new uint8_t[buf_size];
1039 xfr = libusb_alloc_transfer(num_iso_pack);
1041 fprintf(stderr, "oom\n");
1045 int ep = LIBUSB_ENDPOINT_IN | e;
1046 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1047 num_iso_pack, cb_xfr, nullptr, 0);
1048 libusb_set_iso_packet_lengths(xfr, size);
1049 xfr->user_data = this;
1052 change_xfer_size_for_width(assumed_frame_width, xfr);
1055 iso_xfrs.push_back(xfr);
1060 void BMUSBCapture::start_bm_capture()
1062 printf("starting capture\n");
1064 for (libusb_transfer *xfr : iso_xfrs) {
1065 printf("submitting transfer...\n");
1066 int rc = libusb_submit_transfer(xfr);
1069 //printf("num_bytes=%d\n", num_bytes);
1070 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1071 xfr->endpoint, i, libusb_error_name(rc));
1078 libusb_release_interface(devh, 0);
1082 libusb_exit(nullptr);
1087 void BMUSBCapture::stop_dequeue_thread()
1089 dequeue_thread_should_quit = true;
1090 queues_not_empty.notify_all();
1091 dequeue_thread.join();
1094 void BMUSBCapture::start_bm_thread()
1096 should_quit = false;
1097 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1100 void BMUSBCapture::stop_bm_thread()
1106 struct VideoFormatEntry {
1107 uint16_t normalized_video_format;
1109 int frame_rate_nom, frame_rate_den;
1113 bool decode_video_format(uint16_t video_format, int *width, int *height, int *frame_rate_nom, int *frame_rate_den, bool *interlaced)
1115 *interlaced = false;
1117 if (video_format == 0x0800) {
1118 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1119 // It's a strange thing, but what can you do.
1122 *frame_rate_nom = 3013;
1123 *frame_rate_den = 100;
1126 if ((video_format & 0xe800) != 0xe800) {
1127 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1131 *frame_rate_nom = 60;
1132 *frame_rate_den = 1;
1136 // NTSC (480i59.94, I suppose). A special case, see below.
1137 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1140 *frame_rate_nom = 60000;
1141 *frame_rate_den = 1001;
1146 // PAL (576i50, I suppose). A special case, see below.
1147 if (video_format == 0xe909) {
1150 *frame_rate_nom = 50;
1151 *frame_rate_den = 1;
1156 // 0x8 seems to be a flag about availability of deep color on the input,
1157 // except when it's not (e.g. it's the only difference between NTSC
1158 // and PAL). Rather confusing. But we clear it here nevertheless, because
1159 // usually it doesn't mean anything.
1160 uint16_t normalized_video_format = video_format & ~0xe808;
1161 constexpr VideoFormatEntry entries[] = {
1162 { 0x0143, 1280, 720, 50, 1, false }, // 720p50.
1163 { 0x0103, 1280, 720, 60, 1, false }, // 720p60.
1164 { 0x0121, 1280, 720, 60000, 1001, false }, // 720p59.94.
1165 { 0x01c3, 1920, 1080, 30, 1, false }, // 1080p30.
1166 { 0x0003, 1920, 1080, 30, 1, true }, // 1080i60.
1167 { 0x01e1, 1920, 1080, 30000, 1001, false }, // 1080p29.97.
1168 { 0x0021, 1920, 1080, 30000, 1001, true }, // 1080i59.94.
1169 { 0x0063, 1920, 1080, 25, 1, false }, // 1080p25.
1170 { 0x0043, 1920, 1080, 25, 1, true }, // 1080p50.
1171 { 0x008e, 1920, 1080, 24, 1, false }, // 1080p24.
1172 { 0x00a1, 1920, 1080, 24000, 1001, false }, // 1080p23.98.
1174 for (const VideoFormatEntry &entry : entries) {
1175 if (normalized_video_format == entry.normalized_video_format) {
1176 *width = entry.width;
1177 *height = entry.height;
1178 *frame_rate_nom = entry.frame_rate_nom;
1179 *frame_rate_den = entry.frame_rate_den;
1180 *interlaced = entry.interlaced;
1185 printf("Unknown video format 0x%04x. Assuming 720p60.\n", video_format);
1188 *frame_rate_nom = 60;
1189 *frame_rate_den = 1;