1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
35 using namespace std::placeholders;
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
42 #define FRAME_SIZE (8 << 20) // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
50 atomic<bool> should_quit;
52 int find_xfer_size_for_width(int width)
54 // Video seems to require isochronous packets scaled with the width;
55 // seemingly six lines is about right, rounded up to the required 1kB
57 int size = width * 2 * 6;
58 // Note that for 10-bit input, you'll need to increase size accordingly.
59 //size = size * 4 / 3;
60 if (size % 1024 != 0) {
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
69 assert(width >= MIN_WIDTH);
70 size_t size = find_xfer_size_for_width(width);
71 int num_iso_pack = xfr->length / size;
72 if (num_iso_pack != xfr->num_iso_packets ||
73 size != xfr->iso_packet_desc[0].length) {
74 xfr->num_iso_packets = num_iso_pack;
75 libusb_set_iso_packet_lengths(xfr, size);
81 FrameAllocator::~FrameAllocator() {}
83 // Audio is more important than video, and also much cheaper.
84 // By having many more audio frames available, hopefully if something
85 // starts to drop, we'll have CPU load go down (from not having to
86 // process as much video) before we have to drop audio.
87 #define NUM_QUEUED_VIDEO_FRAMES 16
88 #define NUM_QUEUED_AUDIO_FRAMES 64
90 class MallocFrameAllocator : public FrameAllocator {
92 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
93 Frame alloc_frame() override;
94 void release_frame(Frame frame) override;
100 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
103 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
104 : frame_size(frame_size)
106 for (size_t i = 0; i < num_queued_frames; ++i) {
107 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
111 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
116 unique_lock<mutex> lock(freelist_mutex); // Meh.
117 if (freelist.empty()) {
118 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
121 vf.data = freelist.top().release();
122 vf.size = frame_size;
123 freelist.pop(); // Meh.
128 void MallocFrameAllocator::release_frame(Frame frame)
130 if (frame.overflow > 0) {
131 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
133 unique_lock<mutex> lock(freelist_mutex);
134 freelist.push(unique_ptr<uint8_t[]>(frame.data));
137 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
142 return (b - a < 0x8000);
144 int wrap_b = 0x10000 + int(b);
145 return (wrap_b - a < 0x8000);
149 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
151 unique_lock<mutex> lock(queue_lock);
152 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
153 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
154 q->back().timecode, timecode);
155 frame.owner->release_frame(frame);
161 qf.timecode = timecode;
163 q->push_back(move(qf));
164 queues_not_empty.notify_one(); // might be spurious
167 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
169 FILE *fp = fopen(filename, "wb");
170 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
171 printf("short write!\n");
176 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
178 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
181 void BMUSBCapture::dequeue_thread_func()
183 if (has_dequeue_callbacks) {
184 dequeue_init_callback();
186 while (!dequeue_thread_should_quit) {
187 unique_lock<mutex> lock(queue_lock);
188 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
190 if (dequeue_thread_should_quit) break;
192 uint16_t video_timecode = pending_video_frames.front().timecode;
193 uint16_t audio_timecode = pending_audio_frames.front().timecode;
194 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
195 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
197 QueuedFrame video_frame = pending_video_frames.front();
198 pending_video_frames.pop_front();
200 video_frame_allocator->release_frame(video_frame.frame);
201 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
202 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
204 QueuedFrame audio_frame = pending_audio_frames.front();
205 pending_audio_frames.pop_front();
207 frame_callback(audio_timecode,
208 FrameAllocator::Frame(), 0, 0x0000,
209 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
211 QueuedFrame video_frame = pending_video_frames.front();
212 QueuedFrame audio_frame = pending_audio_frames.front();
213 pending_audio_frames.pop_front();
214 pending_video_frames.pop_front();
219 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
220 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
221 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
224 frame_callback(video_timecode,
225 video_frame.frame, HEADER_SIZE, video_frame.format,
226 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
229 if (has_dequeue_callbacks) {
230 dequeue_cleanup_callback();
234 void BMUSBCapture::start_new_frame(const uint8_t *start)
236 uint16_t format = (start[3] << 8) | start[2];
237 uint16_t timecode = (start[1] << 8) | start[0];
239 if (current_video_frame.len > 0) {
240 // If format is 0x0800 (no signal), add a fake (empty) audio
241 // frame to get it out of the queue.
242 // TODO: Figure out if there are other formats that come with
243 // no audio, and treat them the same.
244 if (format == 0x0800) {
245 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
246 if (fake_audio_frame.data == nullptr) {
247 // Oh well, it's just a no-signal frame anyway.
248 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
249 current_video_frame.owner->release_frame(current_video_frame);
250 current_video_frame = video_frame_allocator->alloc_frame();
253 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
256 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
258 // Update the assumed frame width. We might be one frame too late on format changes,
259 // but it's much better than asking the user to choose manually.
260 unsigned width, height, second_field_start, extra_lines_top, extra_lines_bottom, frame_rate_nom, frame_rate_den;
262 if (decode_video_format(format, &width, &height, &second_field_start, &extra_lines_top, &extra_lines_bottom,
263 &frame_rate_nom, &frame_rate_den, &interlaced)) {
264 assumed_frame_width = width;
267 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
269 // //start[7], start[6], start[5], start[4],
270 // read_current_frame, FRAME_SIZE);
272 current_video_frame = video_frame_allocator->alloc_frame();
273 //if (current_video_frame.data == nullptr) {
274 // read_current_frame = -1;
276 // read_current_frame = 0;
280 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
282 uint16_t format = (start[3] << 8) | start[2];
283 uint16_t timecode = (start[1] << 8) | start[0];
284 if (current_audio_frame.len > 0) {
285 //dump_audio_block();
286 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
288 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
289 // format, timecode, read_current_audio_block);
290 current_audio_frame = audio_frame_allocator->alloc_frame();
294 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
296 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
297 for (unsigned j = 0; j < pack->actual_length; j++) {
298 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
299 printf("%02x", xfr->buffer[j + offset]);
302 else if ((j % 8) == 7)
310 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
313 uint8_t *dptr1 = dest1;
314 uint8_t *dptr2 = dest2;
316 for (size_t i = 0; i < n; i += 2) {
322 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
324 if (current_frame->data == nullptr ||
325 current_frame->len > current_frame->size ||
330 int bytes = end - start;
331 if (current_frame->len + bytes > current_frame->size) {
332 current_frame->overflow = current_frame->len + bytes - current_frame->size;
333 current_frame->len = current_frame->size;
334 if (current_frame->overflow > 1048576) {
335 printf("%d bytes overflow after last %s frame\n",
336 int(current_frame->overflow), frame_type_name);
337 current_frame->overflow = 0;
341 if (current_frame->interleaved) {
342 uint8_t *data = current_frame->data + current_frame->len / 2;
343 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
344 if (current_frame->len % 2 == 1) {
348 if (bytes % 2 == 1) {
351 ++current_frame->len;
354 memcpy_interleaved(data, data2, start, bytes);
355 current_frame->len += bytes;
357 memcpy(current_frame->data + current_frame->len, start, bytes);
358 current_frame->len += bytes;
366 void avx2_dump(const char *name, __m256i n)
368 printf("%-10s:", name);
369 printf(" %02x", _mm256_extract_epi8(n, 0));
370 printf(" %02x", _mm256_extract_epi8(n, 1));
371 printf(" %02x", _mm256_extract_epi8(n, 2));
372 printf(" %02x", _mm256_extract_epi8(n, 3));
373 printf(" %02x", _mm256_extract_epi8(n, 4));
374 printf(" %02x", _mm256_extract_epi8(n, 5));
375 printf(" %02x", _mm256_extract_epi8(n, 6));
376 printf(" %02x", _mm256_extract_epi8(n, 7));
378 printf(" %02x", _mm256_extract_epi8(n, 8));
379 printf(" %02x", _mm256_extract_epi8(n, 9));
380 printf(" %02x", _mm256_extract_epi8(n, 10));
381 printf(" %02x", _mm256_extract_epi8(n, 11));
382 printf(" %02x", _mm256_extract_epi8(n, 12));
383 printf(" %02x", _mm256_extract_epi8(n, 13));
384 printf(" %02x", _mm256_extract_epi8(n, 14));
385 printf(" %02x", _mm256_extract_epi8(n, 15));
387 printf(" %02x", _mm256_extract_epi8(n, 16));
388 printf(" %02x", _mm256_extract_epi8(n, 17));
389 printf(" %02x", _mm256_extract_epi8(n, 18));
390 printf(" %02x", _mm256_extract_epi8(n, 19));
391 printf(" %02x", _mm256_extract_epi8(n, 20));
392 printf(" %02x", _mm256_extract_epi8(n, 21));
393 printf(" %02x", _mm256_extract_epi8(n, 22));
394 printf(" %02x", _mm256_extract_epi8(n, 23));
396 printf(" %02x", _mm256_extract_epi8(n, 24));
397 printf(" %02x", _mm256_extract_epi8(n, 25));
398 printf(" %02x", _mm256_extract_epi8(n, 26));
399 printf(" %02x", _mm256_extract_epi8(n, 27));
400 printf(" %02x", _mm256_extract_epi8(n, 28));
401 printf(" %02x", _mm256_extract_epi8(n, 29));
402 printf(" %02x", _mm256_extract_epi8(n, 30));
403 printf(" %02x", _mm256_extract_epi8(n, 31));
408 // Does a memcpy and memchr in one to reduce processing time.
409 // Note that the benefit is somewhat limited if your L3 cache is small,
410 // as you'll (unfortunately) spend most of the time loading the data
413 // Complicated cases are left to the slow path; it basically stops copying
414 // up until the first instance of "sync_char" (usually a bit before, actually).
415 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
416 // data, and what we really need this for is the 00 00 ff ff marker in video data.
417 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
419 if (current_frame->data == nullptr ||
420 current_frame->len > current_frame->size ||
424 size_t orig_bytes = limit - start;
425 if (orig_bytes < 128) {
430 // Don't read more bytes than we can write.
431 limit = min(limit, start + (current_frame->size - current_frame->len));
433 // Align end to 32 bytes.
434 limit = (const uint8_t *)(intptr_t(limit) & ~31);
436 if (start >= limit) {
440 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
441 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
442 if (aligned_start != start) {
443 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
444 if (sync_start == nullptr) {
445 add_to_frame(current_frame, "", start, aligned_start);
447 add_to_frame(current_frame, "", start, sync_start);
452 // Make the length a multiple of 64.
453 if (current_frame->interleaved) {
454 if (((limit - aligned_start) % 64) != 0) {
457 assert(((limit - aligned_start) % 64) == 0);
461 const __m256i needle = _mm256_set1_epi8(sync_char);
463 const __restrict __m256i *in = (const __m256i *)aligned_start;
464 if (current_frame->interleaved) {
465 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
466 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
467 if (current_frame->len % 2 == 1) {
471 __m256i shuffle_cw = _mm256_set_epi8(
472 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
473 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
474 while (in < (const __m256i *)limit) {
475 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
476 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
477 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
479 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
480 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
481 __m256i found = _mm256_or_si256(found1, found2);
483 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
484 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
486 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
487 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
489 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
490 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
492 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
493 _mm256_storeu_si256(out2, hi);
495 if (!_mm256_testz_si256(found, found)) {
503 current_frame->len += (uint8_t *)in - aligned_start;
505 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
506 while (in < (const __m256i *)limit) {
507 __m256i data = _mm256_load_si256(in);
508 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
509 __m256i found = _mm256_cmpeq_epi8(data, needle);
510 if (!_mm256_testz_si256(found, found)) {
517 current_frame->len = (uint8_t *)out - current_frame->data;
520 const __m128i needle = _mm_set1_epi8(sync_char);
522 const __m128i *in = (const __m128i *)aligned_start;
523 if (current_frame->interleaved) {
524 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
525 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
526 if (current_frame->len % 2 == 1) {
530 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
531 while (in < (const __m128i *)limit) {
532 __m128i data1 = _mm_load_si128(in);
533 __m128i data2 = _mm_load_si128(in + 1);
534 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
535 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
536 __m128i data1_hi = _mm_srli_epi16(data1, 8);
537 __m128i data2_hi = _mm_srli_epi16(data2, 8);
538 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
539 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
540 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
541 _mm_storeu_si128(out2, hi);
542 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
543 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
544 if (!_mm_testz_si128(found1, found1) ||
545 !_mm_testz_si128(found2, found2)) {
553 current_frame->len += (uint8_t *)in - aligned_start;
555 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
556 while (in < (const __m128i *)limit) {
557 __m128i data = _mm_load_si128(in);
558 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
559 __m128i found = _mm_cmpeq_epi8(data, needle);
560 if (!_mm_testz_si128(found, found)) {
567 current_frame->len = (uint8_t *)out - current_frame->data;
571 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
573 return (const uint8_t *)in;
577 void decode_packs(const libusb_transfer *xfr,
578 const char *sync_pattern,
580 FrameAllocator::Frame *current_frame,
581 const char *frame_type_name,
582 function<void(const uint8_t *start)> start_callback)
585 for (int i = 0; i < xfr->num_iso_packets; i++) {
586 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
588 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
589 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
594 const uint8_t *start = xfr->buffer + offset;
595 const uint8_t *limit = start + pack->actual_length;
596 while (start < limit) { // Usually runs only one iteration.
598 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
599 if (start == limit) break;
600 assert(start < limit);
603 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
604 if (start_next_frame == nullptr) {
605 // add the rest of the buffer
606 add_to_frame(current_frame, frame_type_name, start, limit);
609 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
610 start = start_next_frame + sync_length; // skip sync
611 start_callback(start);
615 dump_pack(xfr, offset, pack);
617 offset += pack->length;
621 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
623 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
624 fprintf(stderr, "transfer status %d\n", xfr->status);
625 libusb_free_transfer(xfr);
629 assert(xfr->user_data != nullptr);
630 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
632 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
633 if (xfr->endpoint == 0x84) {
634 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
636 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
638 // Update the transfer with the new assumed width, if we're in the process of changing formats.
639 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
642 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
643 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
644 uint8_t *buf = libusb_control_transfer_get_data(xfr);
646 if (setup->wIndex == 44) {
647 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
649 printf("read register %2d: 0x%02x%02x%02x%02x\n",
650 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
653 memcpy(usb->register_file + usb->current_register, buf, 4);
654 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
655 if (usb->current_register == 0) {
656 // read through all of them
657 printf("register dump:");
658 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
659 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
663 libusb_fill_control_setup(xfr->buffer,
664 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
665 /*index=*/usb->current_register, /*length=*/4);
670 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
671 for (i = 0; i < xfr->actual_length; i++) {
672 printf("%02x", xfr->buffer[i]);
682 int rc = libusb_submit_transfer(xfr);
684 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
689 void BMUSBCapture::usb_thread_func()
692 memset(¶m, 0, sizeof(param));
693 param.sched_priority = 1;
694 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
695 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
697 while (!should_quit) {
698 int rc = libusb_handle_events(nullptr);
699 if (rc != LIBUSB_SUCCESS)
704 struct USBCardDevice {
707 libusb_device *device;
710 libusb_device_handle *open_card(int card_index, string *description)
712 libusb_device **devices;
713 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
714 if (num_devices == -1) {
715 fprintf(stderr, "Error finding USB devices\n");
718 vector<USBCardDevice> found_cards;
719 for (ssize_t i = 0; i < num_devices; ++i) {
720 libusb_device_descriptor desc;
721 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
722 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
726 uint8_t bus = libusb_get_bus_number(devices[i]);
727 uint8_t port = libusb_get_port_number(devices[i]);
729 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
730 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
731 libusb_unref_device(devices[i]);
735 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
737 libusb_free_device_list(devices, 0);
739 // Sort the devices to get a consistent ordering.
740 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
741 if (a.product != b.product)
742 return a.product < b.product;
744 return a.bus < b.bus;
745 return a.port < b.port;
748 for (size_t i = 0; i < found_cards.size(); ++i) {
749 const char *product_name = nullptr;
750 if (found_cards[i].product == 0xbd3b) {
751 product_name = "Intensity Shuttle";
752 } else if (found_cards[i].product == 0xbd4f) {
753 product_name = "UltraStudio SDI";
759 snprintf(buf, sizeof(buf), "Card %d: Bus %03u Device %03u %s",
760 int(i), found_cards[i].bus, found_cards[i].port, product_name);
761 if (i == size_t(card_index)) {
764 fprintf(stderr, "%s\n", buf);
767 if (size_t(card_index) >= found_cards.size()) {
768 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
772 libusb_device_handle *devh;
773 int rc = libusb_open(found_cards[card_index].device, &devh);
775 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
779 for (size_t i = 0; i < found_cards.size(); ++i) {
780 libusb_unref_device(found_cards[i].device);
786 void BMUSBCapture::configure_card()
788 if (video_frame_allocator == nullptr) {
789 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
791 if (audio_frame_allocator == nullptr) {
792 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
794 dequeue_thread_should_quit = false;
795 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
798 struct libusb_transfer *xfr;
800 rc = libusb_init(nullptr);
802 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
806 libusb_device_handle *devh = open_card(card_index, &description);
808 fprintf(stderr, "Error finding USB device\n");
812 libusb_config_descriptor *config;
813 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
815 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
820 printf("%d interface\n", config->bNumInterfaces);
821 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
822 printf(" interface %d\n", interface_number);
823 const libusb_interface *interface = &config->interface[interface_number];
824 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
825 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
826 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
827 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
828 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
829 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
835 rc = libusb_set_configuration(devh, /*configuration=*/1);
837 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
841 rc = libusb_claim_interface(devh, 0);
843 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
847 // Alternate setting 1 is output, alternate setting 2 is input.
848 // Card is reset when switching alternates, so the driver uses
849 // this “double switch” when it wants to reset.
851 // There's also alternate settings 3 and 4, which seem to be
852 // like 1 and 2 except they advertise less bandwidth needed.
853 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
855 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
858 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
860 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
864 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
866 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
872 rc = libusb_claim_interface(devh, 3);
874 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
880 // 44 is some kind of timer register (first 16 bits count upwards)
881 // 24 is some sort of watchdog?
882 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
883 // (or will go to 0x73c60010?), also seen 0x73c60100
884 // 12 also changes all the time, unclear why
885 // 16 seems to be autodetected mode somehow
886 // -- this is e00115e0 after reset?
887 // ed0115e0 after mode change [to output?]
888 // 2d0015e0 after more mode change [to input]
889 // ed0115e0 after more mode change
890 // 2d0015e0 after more mode change
892 // 390115e0 seems to indicate we have signal
893 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
895 // 200015e0 on startup
896 // changes to 250115e0 when we sync to the signal
898 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
900 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
902 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
903 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
905 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
906 // perhaps some of them are related to analog output?
908 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
909 // but the driver sets it to 0x8036802a at some point.
911 // all of this is on request 214/215. other requests (192, 219,
912 // 222, 223, 224) are used for firmware upgrade. Probably best to
913 // stay out of it unless you know what you're doing.
917 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
920 // 0x01 - stable signal
922 // 0x08 - unknown (audio??)
932 static const ctrl ctrls[] = {
933 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
934 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
936 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
938 // clearing the 0x08000000 bit seems to change the capture format (other source?)
939 // 0x10000000 = analog audio instead of embedded audio, it seems
940 // 0x3a000000 = component video? (analog audio)
941 // 0x3c000000 = composite video? (analog audio)
942 // 0x3e000000 = s-video? (analog audio)
943 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
944 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
945 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
946 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
947 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
950 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
951 uint32_t flipped = htonl(ctrls[req].data);
952 static uint8_t value[4];
953 memcpy(value, &flipped, sizeof(flipped));
954 int size = sizeof(value);
955 //if (ctrls[req].request == 215) size = 0;
956 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
957 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
959 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
963 if (ctrls[req].index == 16 && rc == 4) {
964 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
968 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
969 for (int i = 0; i < rc; ++i) {
970 printf("%02x", value[i]);
979 static int my_index = 0;
980 static uint8_t value[4];
981 int size = sizeof(value);
982 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
983 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
985 fprintf(stderr, "Error on control\n");
988 printf("rc=%d index=%d: 0x", rc, my_index);
989 for (int i = 0; i < rc; ++i) {
990 printf("%02x", value[i]);
997 // set up an asynchronous transfer of the timer register
998 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
999 static int completed = 0;
1001 xfr = libusb_alloc_transfer(0);
1002 libusb_fill_control_setup(cmdbuf,
1003 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1004 /*index=*/44, /*length=*/4);
1005 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1006 xfr->user_data = this;
1007 libusb_submit_transfer(xfr);
1009 // set up an asynchronous transfer of register 24
1010 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1011 static int completed2 = 0;
1013 xfr = libusb_alloc_transfer(0);
1014 libusb_fill_control_setup(cmdbuf2,
1015 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1016 /*index=*/24, /*length=*/4);
1017 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1018 xfr->user_data = this;
1019 libusb_submit_transfer(xfr);
1022 // set up an asynchronous transfer of the register dump
1023 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1024 static int completed3 = 0;
1026 xfr = libusb_alloc_transfer(0);
1027 libusb_fill_control_setup(cmdbuf3,
1028 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1029 /*index=*/current_register, /*length=*/4);
1030 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1031 xfr->user_data = this;
1032 //libusb_submit_transfer(xfr);
1034 audiofp = fopen("audio.raw", "wb");
1036 // set up isochronous transfers for audio and video
1037 for (int e = 3; e <= 4; ++e) {
1038 //int num_transfers = (e == 3) ? 6 : 6;
1039 int num_transfers = 10;
1040 for (int i = 0; i < num_transfers; ++i) {
1042 int num_iso_pack, size;
1044 // Allocate for minimum width (because that will give us the most
1045 // number of packets, so we don't need to reallocated, but we'll
1046 // default to 720p for the first frame.
1047 size = find_xfer_size_for_width(MIN_WIDTH);
1048 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1049 buf_size = USB_VIDEO_TRANSFER_SIZE;
1053 buf_size = num_iso_pack * size;
1055 assert(size_t(num_iso_pack * size) <= buf_size);
1056 uint8_t *buf = new uint8_t[buf_size];
1058 xfr = libusb_alloc_transfer(num_iso_pack);
1060 fprintf(stderr, "oom\n");
1064 int ep = LIBUSB_ENDPOINT_IN | e;
1065 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1066 num_iso_pack, cb_xfr, nullptr, 0);
1067 libusb_set_iso_packet_lengths(xfr, size);
1068 xfr->user_data = this;
1071 change_xfer_size_for_width(assumed_frame_width, xfr);
1074 iso_xfrs.push_back(xfr);
1079 void BMUSBCapture::start_bm_capture()
1082 for (libusb_transfer *xfr : iso_xfrs) {
1083 int rc = libusb_submit_transfer(xfr);
1086 //printf("num_bytes=%d\n", num_bytes);
1087 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1088 xfr->endpoint, i, libusb_error_name(rc));
1095 libusb_release_interface(devh, 0);
1099 libusb_exit(nullptr);
1104 void BMUSBCapture::stop_dequeue_thread()
1106 dequeue_thread_should_quit = true;
1107 queues_not_empty.notify_all();
1108 dequeue_thread.join();
1111 void BMUSBCapture::start_bm_thread()
1113 should_quit = false;
1114 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1117 void BMUSBCapture::stop_bm_thread()
1123 struct VideoFormatEntry {
1124 uint16_t normalized_video_format;
1125 unsigned width, height, second_field_start;
1126 unsigned extra_lines_top, extra_lines_bottom;
1127 unsigned frame_rate_nom, frame_rate_den;
1131 bool decode_video_format(uint16_t video_format, unsigned *width, unsigned *height, unsigned *second_field_start,
1132 unsigned *extra_lines_top, unsigned *extra_lines_bottom,
1133 unsigned *frame_rate_nom, unsigned *frame_rate_den, bool *interlaced)
1135 *interlaced = false;
1137 // TODO: Add these for all formats as we find them.
1138 *extra_lines_top = *extra_lines_bottom = *second_field_start = 0;
1140 if (video_format == 0x0800) {
1141 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1142 // It's a strange thing, but what can you do.
1145 *extra_lines_top = 0;
1146 *extra_lines_bottom = 0;
1147 *frame_rate_nom = 3013;
1148 *frame_rate_den = 100;
1151 if ((video_format & 0xe800) != 0xe800) {
1152 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1156 *extra_lines_top = 0;
1157 *extra_lines_bottom = 0;
1158 *frame_rate_nom = 60;
1159 *frame_rate_den = 1;
1163 // NTSC (480i59.94, I suppose). A special case, see below.
1164 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1167 *extra_lines_top = 17;
1168 *extra_lines_bottom = 28;
1169 *frame_rate_nom = 30000;
1170 *frame_rate_den = 1001;
1171 *second_field_start = 280;
1176 // PAL (576i50, I suppose). A special case, see below.
1177 if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9) {
1180 *extra_lines_top = 22;
1181 *extra_lines_bottom = 27;
1182 *frame_rate_nom = 25;
1183 *frame_rate_den = 1;
1184 *second_field_start = 335;
1189 // 0x8 seems to be a flag about availability of deep color on the input,
1190 // except when it's not (e.g. it's the only difference between NTSC
1191 // and PAL). Rather confusing. But we clear it here nevertheless, because
1192 // usually it doesn't mean anything.
1194 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1195 uint16_t normalized_video_format = video_format & ~0xe80c;
1196 constexpr VideoFormatEntry entries[] = {
1197 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
1198 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
1199 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
1200 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
1201 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1202 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1203 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
1204 { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
1205 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
1206 { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
1207 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
1208 { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
1209 { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
1210 { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
1211 { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
1213 for (const VideoFormatEntry &entry : entries) {
1214 if (normalized_video_format == entry.normalized_video_format) {
1215 *width = entry.width;
1216 *height = entry.height;
1217 *second_field_start = entry.second_field_start;
1218 *extra_lines_top = entry.extra_lines_top;
1219 *extra_lines_bottom = entry.extra_lines_bottom;
1220 *frame_rate_nom = entry.frame_rate_nom;
1221 *frame_rate_den = entry.frame_rate_den;
1222 *interlaced = entry.interlaced;
1227 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1230 *frame_rate_nom = 60;
1231 *frame_rate_den = 1;