1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
35 using namespace std::placeholders;
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
42 #define FRAME_SIZE (8 << 20) // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
50 atomic<bool> should_quit;
52 int find_xfer_size_for_width(int width)
54 // Video seems to require isochronous packets scaled with the width;
55 // seemingly six lines is about right, rounded up to the required 1kB
57 int size = width * 2 * 6;
58 // Note that for 10-bit input, you'll need to increase size accordingly.
59 //size = size * 4 / 3;
60 if (size % 1024 != 0) {
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
69 assert(width >= MIN_WIDTH);
70 size_t size = find_xfer_size_for_width(width);
71 int num_iso_pack = xfr->length / size;
72 if (num_iso_pack != xfr->num_iso_packets ||
73 size != xfr->iso_packet_desc[0].length) {
74 xfr->num_iso_packets = num_iso_pack;
75 libusb_set_iso_packet_lengths(xfr, size);
81 FrameAllocator::~FrameAllocator() {}
83 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
84 : frame_size(frame_size)
86 for (size_t i = 0; i < num_queued_frames; ++i) {
87 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
91 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
96 unique_lock<mutex> lock(freelist_mutex); // Meh.
97 if (freelist.empty()) {
98 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
101 vf.data = freelist.top().release();
102 vf.size = frame_size;
103 freelist.pop(); // Meh.
108 void MallocFrameAllocator::release_frame(Frame frame)
110 if (frame.overflow > 0) {
111 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
113 unique_lock<mutex> lock(freelist_mutex);
114 freelist.push(unique_ptr<uint8_t[]>(frame.data));
117 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
122 return (b - a < 0x8000);
124 int wrap_b = 0x10000 + int(b);
125 return (wrap_b - a < 0x8000);
129 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
131 unique_lock<mutex> lock(queue_lock);
132 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
133 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
134 q->back().timecode, timecode);
135 frame.owner->release_frame(frame);
141 qf.timecode = timecode;
143 q->push_back(move(qf));
144 queues_not_empty.notify_one(); // might be spurious
147 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
149 FILE *fp = fopen(filename, "wb");
150 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
151 printf("short write!\n");
156 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
158 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
161 void BMUSBCapture::dequeue_thread_func()
163 if (has_dequeue_callbacks) {
164 dequeue_init_callback();
166 while (!dequeue_thread_should_quit) {
167 unique_lock<mutex> lock(queue_lock);
168 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
170 if (dequeue_thread_should_quit) break;
172 uint16_t video_timecode = pending_video_frames.front().timecode;
173 uint16_t audio_timecode = pending_audio_frames.front().timecode;
174 AudioFormat audio_format;
175 audio_format.bits_per_sample = 24;
176 audio_format.num_channels = 8;
177 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
178 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
180 QueuedFrame video_frame = pending_video_frames.front();
181 pending_video_frames.pop_front();
183 video_frame_allocator->release_frame(video_frame.frame);
184 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
185 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
187 QueuedFrame audio_frame = pending_audio_frames.front();
188 pending_audio_frames.pop_front();
190 audio_format.id = audio_frame.format;
192 // Use the video format of the pending frame.
193 QueuedFrame video_frame = pending_video_frames.front();
194 VideoFormat video_format;
195 decode_video_format(video_frame.format, &video_format);
197 frame_callback(audio_timecode,
198 FrameAllocator::Frame(), 0, video_format,
199 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
201 QueuedFrame video_frame = pending_video_frames.front();
202 QueuedFrame audio_frame = pending_audio_frames.front();
203 pending_audio_frames.pop_front();
204 pending_video_frames.pop_front();
209 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
210 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
211 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
214 VideoFormat video_format;
215 audio_format.id = audio_frame.format;
216 if (decode_video_format(video_frame.format, &video_format)) {
217 frame_callback(video_timecode,
218 video_frame.frame, HEADER_SIZE, video_format,
219 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
221 frame_callback(video_timecode,
222 FrameAllocator::Frame(), 0, video_format,
223 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
227 if (has_dequeue_callbacks) {
228 dequeue_cleanup_callback();
232 void BMUSBCapture::start_new_frame(const uint8_t *start)
234 uint16_t format = (start[3] << 8) | start[2];
235 uint16_t timecode = (start[1] << 8) | start[0];
237 if (current_video_frame.len > 0) {
238 // If format is 0x0800 (no signal), add a fake (empty) audio
239 // frame to get it out of the queue.
240 // TODO: Figure out if there are other formats that come with
241 // no audio, and treat them the same.
242 if (format == 0x0800) {
243 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
244 if (fake_audio_frame.data == nullptr) {
245 // Oh well, it's just a no-signal frame anyway.
246 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
247 current_video_frame.owner->release_frame(current_video_frame);
248 current_video_frame = video_frame_allocator->alloc_frame();
251 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
254 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
256 // Update the assumed frame width. We might be one frame too late on format changes,
257 // but it's much better than asking the user to choose manually.
258 VideoFormat video_format;
259 if (decode_video_format(format, &video_format)) {
260 assumed_frame_width = video_format.width;
263 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
265 // //start[7], start[6], start[5], start[4],
266 // read_current_frame, FRAME_SIZE);
268 current_video_frame = video_frame_allocator->alloc_frame();
269 //if (current_video_frame.data == nullptr) {
270 // read_current_frame = -1;
272 // read_current_frame = 0;
276 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
278 uint16_t format = (start[3] << 8) | start[2];
279 uint16_t timecode = (start[1] << 8) | start[0];
280 if (current_audio_frame.len > 0) {
281 //dump_audio_block();
282 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
284 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
285 // format, timecode, read_current_audio_block);
286 current_audio_frame = audio_frame_allocator->alloc_frame();
290 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
292 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
293 for (unsigned j = 0; j < pack->actual_length; j++) {
294 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
295 printf("%02x", xfr->buffer[j + offset]);
298 else if ((j % 8) == 7)
306 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
309 uint8_t *dptr1 = dest1;
310 uint8_t *dptr2 = dest2;
312 for (size_t i = 0; i < n; i += 2) {
318 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
320 if (current_frame->data == nullptr ||
321 current_frame->len > current_frame->size ||
326 int bytes = end - start;
327 if (current_frame->len + bytes > current_frame->size) {
328 current_frame->overflow = current_frame->len + bytes - current_frame->size;
329 current_frame->len = current_frame->size;
330 if (current_frame->overflow > 1048576) {
331 printf("%d bytes overflow after last %s frame\n",
332 int(current_frame->overflow), frame_type_name);
333 current_frame->overflow = 0;
337 if (current_frame->interleaved) {
338 uint8_t *data = current_frame->data + current_frame->len / 2;
339 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
340 if (current_frame->len % 2 == 1) {
344 if (bytes % 2 == 1) {
347 ++current_frame->len;
350 memcpy_interleaved(data, data2, start, bytes);
351 current_frame->len += bytes;
353 memcpy(current_frame->data + current_frame->len, start, bytes);
354 current_frame->len += bytes;
362 void avx2_dump(const char *name, __m256i n)
364 printf("%-10s:", name);
365 printf(" %02x", _mm256_extract_epi8(n, 0));
366 printf(" %02x", _mm256_extract_epi8(n, 1));
367 printf(" %02x", _mm256_extract_epi8(n, 2));
368 printf(" %02x", _mm256_extract_epi8(n, 3));
369 printf(" %02x", _mm256_extract_epi8(n, 4));
370 printf(" %02x", _mm256_extract_epi8(n, 5));
371 printf(" %02x", _mm256_extract_epi8(n, 6));
372 printf(" %02x", _mm256_extract_epi8(n, 7));
374 printf(" %02x", _mm256_extract_epi8(n, 8));
375 printf(" %02x", _mm256_extract_epi8(n, 9));
376 printf(" %02x", _mm256_extract_epi8(n, 10));
377 printf(" %02x", _mm256_extract_epi8(n, 11));
378 printf(" %02x", _mm256_extract_epi8(n, 12));
379 printf(" %02x", _mm256_extract_epi8(n, 13));
380 printf(" %02x", _mm256_extract_epi8(n, 14));
381 printf(" %02x", _mm256_extract_epi8(n, 15));
383 printf(" %02x", _mm256_extract_epi8(n, 16));
384 printf(" %02x", _mm256_extract_epi8(n, 17));
385 printf(" %02x", _mm256_extract_epi8(n, 18));
386 printf(" %02x", _mm256_extract_epi8(n, 19));
387 printf(" %02x", _mm256_extract_epi8(n, 20));
388 printf(" %02x", _mm256_extract_epi8(n, 21));
389 printf(" %02x", _mm256_extract_epi8(n, 22));
390 printf(" %02x", _mm256_extract_epi8(n, 23));
392 printf(" %02x", _mm256_extract_epi8(n, 24));
393 printf(" %02x", _mm256_extract_epi8(n, 25));
394 printf(" %02x", _mm256_extract_epi8(n, 26));
395 printf(" %02x", _mm256_extract_epi8(n, 27));
396 printf(" %02x", _mm256_extract_epi8(n, 28));
397 printf(" %02x", _mm256_extract_epi8(n, 29));
398 printf(" %02x", _mm256_extract_epi8(n, 30));
399 printf(" %02x", _mm256_extract_epi8(n, 31));
404 // Does a memcpy and memchr in one to reduce processing time.
405 // Note that the benefit is somewhat limited if your L3 cache is small,
406 // as you'll (unfortunately) spend most of the time loading the data
409 // Complicated cases are left to the slow path; it basically stops copying
410 // up until the first instance of "sync_char" (usually a bit before, actually).
411 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
412 // data, and what we really need this for is the 00 00 ff ff marker in video data.
413 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
415 if (current_frame->data == nullptr ||
416 current_frame->len > current_frame->size ||
420 size_t orig_bytes = limit - start;
421 if (orig_bytes < 128) {
426 // Don't read more bytes than we can write.
427 limit = min(limit, start + (current_frame->size - current_frame->len));
429 // Align end to 32 bytes.
430 limit = (const uint8_t *)(intptr_t(limit) & ~31);
432 if (start >= limit) {
436 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
437 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
438 if (aligned_start != start) {
439 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
440 if (sync_start == nullptr) {
441 add_to_frame(current_frame, "", start, aligned_start);
443 add_to_frame(current_frame, "", start, sync_start);
448 // Make the length a multiple of 64.
449 if (current_frame->interleaved) {
450 if (((limit - aligned_start) % 64) != 0) {
453 assert(((limit - aligned_start) % 64) == 0);
457 const __m256i needle = _mm256_set1_epi8(sync_char);
459 const __restrict __m256i *in = (const __m256i *)aligned_start;
460 if (current_frame->interleaved) {
461 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
462 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
463 if (current_frame->len % 2 == 1) {
467 __m256i shuffle_cw = _mm256_set_epi8(
468 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
469 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
470 while (in < (const __m256i *)limit) {
471 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
472 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
473 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
475 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
476 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
477 __m256i found = _mm256_or_si256(found1, found2);
479 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
480 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
482 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
483 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
485 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
486 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
488 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
489 _mm256_storeu_si256(out2, hi);
491 if (!_mm256_testz_si256(found, found)) {
499 current_frame->len += (uint8_t *)in - aligned_start;
501 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
502 while (in < (const __m256i *)limit) {
503 __m256i data = _mm256_load_si256(in);
504 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
505 __m256i found = _mm256_cmpeq_epi8(data, needle);
506 if (!_mm256_testz_si256(found, found)) {
513 current_frame->len = (uint8_t *)out - current_frame->data;
516 const __m128i needle = _mm_set1_epi8(sync_char);
518 const __m128i *in = (const __m128i *)aligned_start;
519 if (current_frame->interleaved) {
520 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
521 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
522 if (current_frame->len % 2 == 1) {
526 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
527 while (in < (const __m128i *)limit) {
528 __m128i data1 = _mm_load_si128(in);
529 __m128i data2 = _mm_load_si128(in + 1);
530 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
531 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
532 __m128i data1_hi = _mm_srli_epi16(data1, 8);
533 __m128i data2_hi = _mm_srli_epi16(data2, 8);
534 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
535 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
536 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
537 _mm_storeu_si128(out2, hi);
538 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
539 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
540 if (!_mm_testz_si128(found1, found1) ||
541 !_mm_testz_si128(found2, found2)) {
549 current_frame->len += (uint8_t *)in - aligned_start;
551 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
552 while (in < (const __m128i *)limit) {
553 __m128i data = _mm_load_si128(in);
554 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
555 __m128i found = _mm_cmpeq_epi8(data, needle);
556 if (!_mm_testz_si128(found, found)) {
563 current_frame->len = (uint8_t *)out - current_frame->data;
567 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
569 return (const uint8_t *)in;
573 void decode_packs(const libusb_transfer *xfr,
574 const char *sync_pattern,
576 FrameAllocator::Frame *current_frame,
577 const char *frame_type_name,
578 function<void(const uint8_t *start)> start_callback)
581 for (int i = 0; i < xfr->num_iso_packets; i++) {
582 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
584 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
585 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
590 const uint8_t *start = xfr->buffer + offset;
591 const uint8_t *limit = start + pack->actual_length;
592 while (start < limit) { // Usually runs only one iteration.
594 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
595 if (start == limit) break;
596 assert(start < limit);
599 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
600 if (start_next_frame == nullptr) {
601 // add the rest of the buffer
602 add_to_frame(current_frame, frame_type_name, start, limit);
605 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
606 start = start_next_frame + sync_length; // skip sync
607 start_callback(start);
611 dump_pack(xfr, offset, pack);
613 offset += pack->length;
617 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
619 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
620 fprintf(stderr, "transfer status %d\n", xfr->status);
621 libusb_free_transfer(xfr);
625 assert(xfr->user_data != nullptr);
626 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
628 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
629 if (xfr->endpoint == 0x84) {
630 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
632 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
634 // Update the transfer with the new assumed width, if we're in the process of changing formats.
635 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
638 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
639 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
640 uint8_t *buf = libusb_control_transfer_get_data(xfr);
642 if (setup->wIndex == 44) {
643 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
645 printf("read register %2d: 0x%02x%02x%02x%02x\n",
646 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
649 memcpy(usb->register_file + usb->current_register, buf, 4);
650 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
651 if (usb->current_register == 0) {
652 // read through all of them
653 printf("register dump:");
654 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
655 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
659 libusb_fill_control_setup(xfr->buffer,
660 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
661 /*index=*/usb->current_register, /*length=*/4);
666 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
667 for (i = 0; i < xfr->actual_length; i++) {
668 printf("%02x", xfr->buffer[i]);
678 int rc = libusb_submit_transfer(xfr);
680 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
685 void BMUSBCapture::usb_thread_func()
688 memset(¶m, 0, sizeof(param));
689 param.sched_priority = 1;
690 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
691 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
693 while (!should_quit) {
694 int rc = libusb_handle_events(nullptr);
695 if (rc != LIBUSB_SUCCESS)
700 struct USBCardDevice {
703 libusb_device *device;
706 libusb_device_handle *open_card(int card_index, string *description)
708 libusb_device **devices;
709 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
710 if (num_devices == -1) {
711 fprintf(stderr, "Error finding USB devices\n");
714 vector<USBCardDevice> found_cards;
715 for (ssize_t i = 0; i < num_devices; ++i) {
716 libusb_device_descriptor desc;
717 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
718 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
722 uint8_t bus = libusb_get_bus_number(devices[i]);
723 uint8_t port = libusb_get_port_number(devices[i]);
725 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
726 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
727 libusb_unref_device(devices[i]);
731 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
733 libusb_free_device_list(devices, 0);
735 // Sort the devices to get a consistent ordering.
736 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
737 if (a.product != b.product)
738 return a.product < b.product;
740 return a.bus < b.bus;
741 return a.port < b.port;
744 for (size_t i = 0; i < found_cards.size(); ++i) {
745 const char *product_name = nullptr;
746 if (found_cards[i].product == 0xbd3b) {
747 product_name = "Intensity Shuttle";
748 } else if (found_cards[i].product == 0xbd4f) {
749 product_name = "UltraStudio SDI";
755 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
756 int(i), found_cards[i].bus, found_cards[i].port, product_name);
757 if (i == size_t(card_index)) {
760 fprintf(stderr, "%s\n", buf);
763 if (size_t(card_index) >= found_cards.size()) {
764 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
768 libusb_device_handle *devh;
769 int rc = libusb_open(found_cards[card_index].device, &devh);
771 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
775 for (size_t i = 0; i < found_cards.size(); ++i) {
776 libusb_unref_device(found_cards[i].device);
782 void BMUSBCapture::configure_card()
784 if (video_frame_allocator == nullptr) {
785 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
787 if (audio_frame_allocator == nullptr) {
788 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
790 dequeue_thread_should_quit = false;
791 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
794 struct libusb_transfer *xfr;
796 rc = libusb_init(nullptr);
798 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
802 devh = open_card(card_index, &description);
804 fprintf(stderr, "Error finding USB device\n");
808 libusb_config_descriptor *config;
809 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
811 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
816 printf("%d interface\n", config->bNumInterfaces);
817 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
818 printf(" interface %d\n", interface_number);
819 const libusb_interface *interface = &config->interface[interface_number];
820 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
821 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
822 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
823 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
824 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
825 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
831 rc = libusb_set_configuration(devh, /*configuration=*/1);
833 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
837 rc = libusb_claim_interface(devh, 0);
839 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
843 // Alternate setting 1 is output, alternate setting 2 is input.
844 // Card is reset when switching alternates, so the driver uses
845 // this “double switch” when it wants to reset.
847 // There's also alternate settings 3 and 4, which seem to be
848 // like 1 and 2 except they advertise less bandwidth needed.
849 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
851 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
854 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
856 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
860 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
862 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
868 rc = libusb_claim_interface(devh, 3);
870 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
876 // 44 is some kind of timer register (first 16 bits count upwards)
877 // 24 is some sort of watchdog?
878 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
879 // (or will go to 0x73c60010?), also seen 0x73c60100
880 // 12 also changes all the time, unclear why
881 // 16 seems to be autodetected mode somehow
882 // -- this is e00115e0 after reset?
883 // ed0115e0 after mode change [to output?]
884 // 2d0015e0 after more mode change [to input]
885 // ed0115e0 after more mode change
886 // 2d0015e0 after more mode change
888 // 390115e0 seems to indicate we have signal
889 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
891 // 200015e0 on startup
892 // changes to 250115e0 when we sync to the signal
894 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
896 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
898 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
899 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
901 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
902 // perhaps some of them are related to analog output?
904 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
905 // but the driver sets it to 0x8036802a at some point.
907 // all of this is on request 214/215. other requests (192, 219,
908 // 222, 223, 224) are used for firmware upgrade. Probably best to
909 // stay out of it unless you know what you're doing.
913 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
916 // 0x01 - stable signal
918 // 0x08 - unknown (audio??)
922 update_capture_mode();
930 static const ctrl ctrls[] = {
931 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
932 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
934 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
935 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
936 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
937 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
940 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
941 uint32_t flipped = htonl(ctrls[req].data);
942 static uint8_t value[4];
943 memcpy(value, &flipped, sizeof(flipped));
944 int size = sizeof(value);
945 //if (ctrls[req].request == 215) size = 0;
946 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
947 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
949 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
953 if (ctrls[req].index == 16 && rc == 4) {
954 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
958 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
959 for (int i = 0; i < rc; ++i) {
960 printf("%02x", value[i]);
969 static int my_index = 0;
970 static uint8_t value[4];
971 int size = sizeof(value);
972 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
973 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
975 fprintf(stderr, "Error on control\n");
978 printf("rc=%d index=%d: 0x", rc, my_index);
979 for (int i = 0; i < rc; ++i) {
980 printf("%02x", value[i]);
987 // set up an asynchronous transfer of the timer register
988 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
989 static int completed = 0;
991 xfr = libusb_alloc_transfer(0);
992 libusb_fill_control_setup(cmdbuf,
993 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
994 /*index=*/44, /*length=*/4);
995 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
996 xfr->user_data = this;
997 libusb_submit_transfer(xfr);
999 // set up an asynchronous transfer of register 24
1000 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1001 static int completed2 = 0;
1003 xfr = libusb_alloc_transfer(0);
1004 libusb_fill_control_setup(cmdbuf2,
1005 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1006 /*index=*/24, /*length=*/4);
1007 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1008 xfr->user_data = this;
1009 libusb_submit_transfer(xfr);
1012 // set up an asynchronous transfer of the register dump
1013 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1014 static int completed3 = 0;
1016 xfr = libusb_alloc_transfer(0);
1017 libusb_fill_control_setup(cmdbuf3,
1018 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1019 /*index=*/current_register, /*length=*/4);
1020 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1021 xfr->user_data = this;
1022 //libusb_submit_transfer(xfr);
1024 //audiofp = fopen("audio.raw", "wb");
1026 // set up isochronous transfers for audio and video
1027 for (int e = 3; e <= 4; ++e) {
1028 //int num_transfers = (e == 3) ? 6 : 6;
1029 int num_transfers = 10;
1030 for (int i = 0; i < num_transfers; ++i) {
1032 int num_iso_pack, size;
1034 // Allocate for minimum width (because that will give us the most
1035 // number of packets, so we don't need to reallocated, but we'll
1036 // default to 720p for the first frame.
1037 size = find_xfer_size_for_width(MIN_WIDTH);
1038 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1039 buf_size = USB_VIDEO_TRANSFER_SIZE;
1043 buf_size = num_iso_pack * size;
1045 assert(size_t(num_iso_pack * size) <= buf_size);
1046 uint8_t *buf = new uint8_t[buf_size];
1048 xfr = libusb_alloc_transfer(num_iso_pack);
1050 fprintf(stderr, "oom\n");
1054 int ep = LIBUSB_ENDPOINT_IN | e;
1055 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1056 num_iso_pack, cb_xfr, nullptr, 0);
1057 libusb_set_iso_packet_lengths(xfr, size);
1058 xfr->user_data = this;
1061 change_xfer_size_for_width(assumed_frame_width, xfr);
1064 iso_xfrs.push_back(xfr);
1069 void BMUSBCapture::start_bm_capture()
1072 for (libusb_transfer *xfr : iso_xfrs) {
1073 int rc = libusb_submit_transfer(xfr);
1076 //printf("num_bytes=%d\n", num_bytes);
1077 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1078 xfr->endpoint, i, libusb_error_name(rc));
1085 libusb_release_interface(devh, 0);
1089 libusb_exit(nullptr);
1094 void BMUSBCapture::stop_dequeue_thread()
1096 dequeue_thread_should_quit = true;
1097 queues_not_empty.notify_all();
1098 dequeue_thread.join();
1101 void BMUSBCapture::start_bm_thread()
1103 should_quit = false;
1104 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1107 void BMUSBCapture::stop_bm_thread()
1113 struct VideoFormatEntry {
1114 uint16_t normalized_video_format;
1115 unsigned width, height, second_field_start;
1116 unsigned extra_lines_top, extra_lines_bottom;
1117 unsigned frame_rate_nom, frame_rate_den;
1121 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1123 decoded_video_format->id = video_format;
1124 decoded_video_format->interlaced = false;
1126 // TODO: Add these for all formats as we find them.
1127 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1129 if (video_format == 0x0800) {
1130 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1131 // It's a strange thing, but what can you do.
1132 decoded_video_format->width = 720;
1133 decoded_video_format->height = 525;
1134 decoded_video_format->extra_lines_top = 0;
1135 decoded_video_format->extra_lines_bottom = 0;
1136 decoded_video_format->frame_rate_nom = 3013;
1137 decoded_video_format->frame_rate_den = 100;
1138 decoded_video_format->has_signal = false;
1141 if ((video_format & 0xe800) != 0xe800) {
1142 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1144 decoded_video_format->width = 0;
1145 decoded_video_format->height = 0;
1146 decoded_video_format->extra_lines_top = 0;
1147 decoded_video_format->extra_lines_bottom = 0;
1148 decoded_video_format->frame_rate_nom = 60;
1149 decoded_video_format->frame_rate_den = 1;
1150 decoded_video_format->has_signal = false;
1154 decoded_video_format->has_signal = true;
1156 // NTSC (480i59.94, I suppose). A special case, see below.
1157 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1158 decoded_video_format->width = 720;
1159 decoded_video_format->height = 480;
1160 decoded_video_format->extra_lines_top = 17;
1161 decoded_video_format->extra_lines_bottom = 28;
1162 decoded_video_format->frame_rate_nom = 30000;
1163 decoded_video_format->frame_rate_den = 1001;
1164 decoded_video_format->second_field_start = 280;
1165 decoded_video_format->interlaced = true;
1169 // PAL (576i50, I suppose). A special case, see below.
1170 if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1171 decoded_video_format->width = 720;
1172 decoded_video_format->height = 576;
1173 decoded_video_format->extra_lines_top = 22;
1174 decoded_video_format->extra_lines_bottom = 27;
1175 decoded_video_format->frame_rate_nom = 25;
1176 decoded_video_format->frame_rate_den = 1;
1177 decoded_video_format->second_field_start = 335;
1178 decoded_video_format->interlaced = true;
1182 // 0x8 seems to be a flag about availability of deep color on the input,
1183 // except when it's not (e.g. it's the only difference between NTSC
1184 // and PAL). Rather confusing. But we clear it here nevertheless, because
1185 // usually it doesn't mean anything.
1187 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1188 uint16_t normalized_video_format = video_format & ~0xe80c;
1189 constexpr VideoFormatEntry entries[] = {
1190 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
1191 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
1192 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
1193 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
1194 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1195 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1196 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
1197 { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
1198 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
1199 { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
1200 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
1201 { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
1202 { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
1203 { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
1204 { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
1206 for (const VideoFormatEntry &entry : entries) {
1207 if (normalized_video_format == entry.normalized_video_format) {
1208 decoded_video_format->width = entry.width;
1209 decoded_video_format->height = entry.height;
1210 decoded_video_format->second_field_start = entry.second_field_start;
1211 decoded_video_format->extra_lines_top = entry.extra_lines_top;
1212 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1213 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1214 decoded_video_format->frame_rate_den = entry.frame_rate_den;
1215 decoded_video_format->interlaced = entry.interlaced;
1220 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1221 decoded_video_format->width = 1280;
1222 decoded_video_format->height = 720;
1223 decoded_video_format->frame_rate_nom = 60;
1224 decoded_video_format->frame_rate_den = 1;
1228 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1230 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1231 VideoMode auto_mode;
1232 auto_mode.name = "Autodetect";
1233 auto_mode.autodetect = true;
1234 return {{ 0, auto_mode }};
1237 uint32_t BMUSBCapture::get_current_video_mode() const
1239 return 0; // Matches get_available_video_modes().
1242 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1244 assert(video_mode_id == 0); // Matches get_available_video_modes().
1247 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1250 { 0x00000000, "HDMI/SDI" },
1251 { 0x02000000, "Component" },
1252 { 0x04000000, "Composite" },
1253 { 0x06000000, "S-video" }
1257 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1259 assert((video_input_id & ~0x06000000) == 0);
1260 current_video_input = video_input_id;
1261 update_capture_mode();
1264 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1267 { 0x00000000, "Embedded" },
1268 { 0x10000000, "Analog" }
1272 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1274 assert((audio_input_id & ~0x10000000) == 0);
1275 current_audio_input = audio_input_id;
1276 update_capture_mode();
1279 void BMUSBCapture::update_capture_mode()
1281 // clearing the 0x20000000 bit seems to activate 10-bit capture (v210).
1282 // clearing the 0x08000000 bit seems to change the capture format (other source?)
1283 uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input);
1285 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1286 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1288 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));