1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
35 using namespace std::placeholders;
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
42 #define FRAME_SIZE (8 << 20) // 8 MB.
43 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
50 atomic<bool> should_quit;
52 int find_xfer_size_for_width(int width)
54 // Video seems to require isochronous packets scaled with the width;
55 // seemingly six lines is about right, rounded up to the required 1kB
57 int size = width * 2 * 6;
58 // Note that for 10-bit input, you'll need to increase size accordingly.
59 //size = size * 4 / 3;
60 if (size % 1024 != 0) {
67 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
69 assert(width >= MIN_WIDTH);
70 size_t size = find_xfer_size_for_width(width);
71 int num_iso_pack = xfr->length / size;
72 if (num_iso_pack != xfr->num_iso_packets ||
73 size != xfr->iso_packet_desc[0].length) {
74 xfr->num_iso_packets = num_iso_pack;
75 libusb_set_iso_packet_lengths(xfr, size);
81 FrameAllocator::~FrameAllocator() {}
83 // Audio is more important than video, and also much cheaper.
84 // By having many more audio frames available, hopefully if something
85 // starts to drop, we'll have CPU load go down (from not having to
86 // process as much video) before we have to drop audio.
87 #define NUM_QUEUED_VIDEO_FRAMES 16
88 #define NUM_QUEUED_AUDIO_FRAMES 64
90 class MallocFrameAllocator : public FrameAllocator {
92 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
93 Frame alloc_frame() override;
94 void release_frame(Frame frame) override;
100 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
103 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
104 : frame_size(frame_size)
106 for (size_t i = 0; i < num_queued_frames; ++i) {
107 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
111 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
116 unique_lock<mutex> lock(freelist_mutex); // Meh.
117 if (freelist.empty()) {
118 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
121 vf.data = freelist.top().release();
122 vf.size = frame_size;
123 freelist.pop(); // Meh.
128 void MallocFrameAllocator::release_frame(Frame frame)
130 if (frame.overflow > 0) {
131 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
133 unique_lock<mutex> lock(freelist_mutex);
134 freelist.push(unique_ptr<uint8_t[]>(frame.data));
137 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
142 return (b - a < 0x8000);
144 int wrap_b = 0x10000 + int(b);
145 return (wrap_b - a < 0x8000);
149 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
151 unique_lock<mutex> lock(queue_lock);
152 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
153 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
154 q->back().timecode, timecode);
155 frame.owner->release_frame(frame);
161 qf.timecode = timecode;
163 q->push_back(move(qf));
164 queues_not_empty.notify_one(); // might be spurious
167 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
169 FILE *fp = fopen(filename, "wb");
170 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
171 printf("short write!\n");
176 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
178 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
181 void BMUSBCapture::dequeue_thread_func()
183 if (has_dequeue_callbacks) {
184 dequeue_init_callback();
186 while (!dequeue_thread_should_quit) {
187 unique_lock<mutex> lock(queue_lock);
188 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
190 if (dequeue_thread_should_quit) break;
192 uint16_t video_timecode = pending_video_frames.front().timecode;
193 uint16_t audio_timecode = pending_audio_frames.front().timecode;
194 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
195 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
197 QueuedFrame video_frame = pending_video_frames.front();
198 pending_video_frames.pop_front();
200 video_frame_allocator->release_frame(video_frame.frame);
201 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
202 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
204 QueuedFrame audio_frame = pending_audio_frames.front();
205 pending_audio_frames.pop_front();
207 frame_callback(audio_timecode,
208 FrameAllocator::Frame(), 0, VideoFormat(),
209 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
211 QueuedFrame video_frame = pending_video_frames.front();
212 QueuedFrame audio_frame = pending_audio_frames.front();
213 pending_audio_frames.pop_front();
214 pending_video_frames.pop_front();
219 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
220 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
221 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
224 VideoFormat video_format;
225 if (decode_video_format(video_frame.format, &video_format)) {
226 frame_callback(video_timecode,
227 video_frame.frame, HEADER_SIZE, video_format,
228 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
230 frame_callback(video_timecode,
231 FrameAllocator::Frame(), 0, video_format,
232 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
236 if (has_dequeue_callbacks) {
237 dequeue_cleanup_callback();
241 void BMUSBCapture::start_new_frame(const uint8_t *start)
243 uint16_t format = (start[3] << 8) | start[2];
244 uint16_t timecode = (start[1] << 8) | start[0];
246 if (current_video_frame.len > 0) {
247 // If format is 0x0800 (no signal), add a fake (empty) audio
248 // frame to get it out of the queue.
249 // TODO: Figure out if there are other formats that come with
250 // no audio, and treat them the same.
251 if (format == 0x0800) {
252 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
253 if (fake_audio_frame.data == nullptr) {
254 // Oh well, it's just a no-signal frame anyway.
255 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
256 current_video_frame.owner->release_frame(current_video_frame);
257 current_video_frame = video_frame_allocator->alloc_frame();
260 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
263 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
265 // Update the assumed frame width. We might be one frame too late on format changes,
266 // but it's much better than asking the user to choose manually.
267 VideoFormat video_format;
268 if (decode_video_format(format, &video_format)) {
269 assumed_frame_width = video_format.width;
272 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
274 // //start[7], start[6], start[5], start[4],
275 // read_current_frame, FRAME_SIZE);
277 current_video_frame = video_frame_allocator->alloc_frame();
278 //if (current_video_frame.data == nullptr) {
279 // read_current_frame = -1;
281 // read_current_frame = 0;
285 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
287 uint16_t format = (start[3] << 8) | start[2];
288 uint16_t timecode = (start[1] << 8) | start[0];
289 if (current_audio_frame.len > 0) {
290 //dump_audio_block();
291 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
293 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
294 // format, timecode, read_current_audio_block);
295 current_audio_frame = audio_frame_allocator->alloc_frame();
299 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
301 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
302 for (unsigned j = 0; j < pack->actual_length; j++) {
303 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
304 printf("%02x", xfr->buffer[j + offset]);
307 else if ((j % 8) == 7)
315 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
318 uint8_t *dptr1 = dest1;
319 uint8_t *dptr2 = dest2;
321 for (size_t i = 0; i < n; i += 2) {
327 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
329 if (current_frame->data == nullptr ||
330 current_frame->len > current_frame->size ||
335 int bytes = end - start;
336 if (current_frame->len + bytes > current_frame->size) {
337 current_frame->overflow = current_frame->len + bytes - current_frame->size;
338 current_frame->len = current_frame->size;
339 if (current_frame->overflow > 1048576) {
340 printf("%d bytes overflow after last %s frame\n",
341 int(current_frame->overflow), frame_type_name);
342 current_frame->overflow = 0;
346 if (current_frame->interleaved) {
347 uint8_t *data = current_frame->data + current_frame->len / 2;
348 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
349 if (current_frame->len % 2 == 1) {
353 if (bytes % 2 == 1) {
356 ++current_frame->len;
359 memcpy_interleaved(data, data2, start, bytes);
360 current_frame->len += bytes;
362 memcpy(current_frame->data + current_frame->len, start, bytes);
363 current_frame->len += bytes;
371 void avx2_dump(const char *name, __m256i n)
373 printf("%-10s:", name);
374 printf(" %02x", _mm256_extract_epi8(n, 0));
375 printf(" %02x", _mm256_extract_epi8(n, 1));
376 printf(" %02x", _mm256_extract_epi8(n, 2));
377 printf(" %02x", _mm256_extract_epi8(n, 3));
378 printf(" %02x", _mm256_extract_epi8(n, 4));
379 printf(" %02x", _mm256_extract_epi8(n, 5));
380 printf(" %02x", _mm256_extract_epi8(n, 6));
381 printf(" %02x", _mm256_extract_epi8(n, 7));
383 printf(" %02x", _mm256_extract_epi8(n, 8));
384 printf(" %02x", _mm256_extract_epi8(n, 9));
385 printf(" %02x", _mm256_extract_epi8(n, 10));
386 printf(" %02x", _mm256_extract_epi8(n, 11));
387 printf(" %02x", _mm256_extract_epi8(n, 12));
388 printf(" %02x", _mm256_extract_epi8(n, 13));
389 printf(" %02x", _mm256_extract_epi8(n, 14));
390 printf(" %02x", _mm256_extract_epi8(n, 15));
392 printf(" %02x", _mm256_extract_epi8(n, 16));
393 printf(" %02x", _mm256_extract_epi8(n, 17));
394 printf(" %02x", _mm256_extract_epi8(n, 18));
395 printf(" %02x", _mm256_extract_epi8(n, 19));
396 printf(" %02x", _mm256_extract_epi8(n, 20));
397 printf(" %02x", _mm256_extract_epi8(n, 21));
398 printf(" %02x", _mm256_extract_epi8(n, 22));
399 printf(" %02x", _mm256_extract_epi8(n, 23));
401 printf(" %02x", _mm256_extract_epi8(n, 24));
402 printf(" %02x", _mm256_extract_epi8(n, 25));
403 printf(" %02x", _mm256_extract_epi8(n, 26));
404 printf(" %02x", _mm256_extract_epi8(n, 27));
405 printf(" %02x", _mm256_extract_epi8(n, 28));
406 printf(" %02x", _mm256_extract_epi8(n, 29));
407 printf(" %02x", _mm256_extract_epi8(n, 30));
408 printf(" %02x", _mm256_extract_epi8(n, 31));
413 // Does a memcpy and memchr in one to reduce processing time.
414 // Note that the benefit is somewhat limited if your L3 cache is small,
415 // as you'll (unfortunately) spend most of the time loading the data
418 // Complicated cases are left to the slow path; it basically stops copying
419 // up until the first instance of "sync_char" (usually a bit before, actually).
420 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
421 // data, and what we really need this for is the 00 00 ff ff marker in video data.
422 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
424 if (current_frame->data == nullptr ||
425 current_frame->len > current_frame->size ||
429 size_t orig_bytes = limit - start;
430 if (orig_bytes < 128) {
435 // Don't read more bytes than we can write.
436 limit = min(limit, start + (current_frame->size - current_frame->len));
438 // Align end to 32 bytes.
439 limit = (const uint8_t *)(intptr_t(limit) & ~31);
441 if (start >= limit) {
445 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
446 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
447 if (aligned_start != start) {
448 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
449 if (sync_start == nullptr) {
450 add_to_frame(current_frame, "", start, aligned_start);
452 add_to_frame(current_frame, "", start, sync_start);
457 // Make the length a multiple of 64.
458 if (current_frame->interleaved) {
459 if (((limit - aligned_start) % 64) != 0) {
462 assert(((limit - aligned_start) % 64) == 0);
466 const __m256i needle = _mm256_set1_epi8(sync_char);
468 const __restrict __m256i *in = (const __m256i *)aligned_start;
469 if (current_frame->interleaved) {
470 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
471 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
472 if (current_frame->len % 2 == 1) {
476 __m256i shuffle_cw = _mm256_set_epi8(
477 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
478 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
479 while (in < (const __m256i *)limit) {
480 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
481 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
482 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
484 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
485 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
486 __m256i found = _mm256_or_si256(found1, found2);
488 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
489 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
491 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
492 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
494 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
495 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
497 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
498 _mm256_storeu_si256(out2, hi);
500 if (!_mm256_testz_si256(found, found)) {
508 current_frame->len += (uint8_t *)in - aligned_start;
510 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
511 while (in < (const __m256i *)limit) {
512 __m256i data = _mm256_load_si256(in);
513 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
514 __m256i found = _mm256_cmpeq_epi8(data, needle);
515 if (!_mm256_testz_si256(found, found)) {
522 current_frame->len = (uint8_t *)out - current_frame->data;
525 const __m128i needle = _mm_set1_epi8(sync_char);
527 const __m128i *in = (const __m128i *)aligned_start;
528 if (current_frame->interleaved) {
529 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
530 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
531 if (current_frame->len % 2 == 1) {
535 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
536 while (in < (const __m128i *)limit) {
537 __m128i data1 = _mm_load_si128(in);
538 __m128i data2 = _mm_load_si128(in + 1);
539 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
540 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
541 __m128i data1_hi = _mm_srli_epi16(data1, 8);
542 __m128i data2_hi = _mm_srli_epi16(data2, 8);
543 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
544 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
545 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
546 _mm_storeu_si128(out2, hi);
547 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
548 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
549 if (!_mm_testz_si128(found1, found1) ||
550 !_mm_testz_si128(found2, found2)) {
558 current_frame->len += (uint8_t *)in - aligned_start;
560 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
561 while (in < (const __m128i *)limit) {
562 __m128i data = _mm_load_si128(in);
563 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
564 __m128i found = _mm_cmpeq_epi8(data, needle);
565 if (!_mm_testz_si128(found, found)) {
572 current_frame->len = (uint8_t *)out - current_frame->data;
576 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
578 return (const uint8_t *)in;
582 void decode_packs(const libusb_transfer *xfr,
583 const char *sync_pattern,
585 FrameAllocator::Frame *current_frame,
586 const char *frame_type_name,
587 function<void(const uint8_t *start)> start_callback)
590 for (int i = 0; i < xfr->num_iso_packets; i++) {
591 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
593 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
594 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
599 const uint8_t *start = xfr->buffer + offset;
600 const uint8_t *limit = start + pack->actual_length;
601 while (start < limit) { // Usually runs only one iteration.
603 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
604 if (start == limit) break;
605 assert(start < limit);
608 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
609 if (start_next_frame == nullptr) {
610 // add the rest of the buffer
611 add_to_frame(current_frame, frame_type_name, start, limit);
614 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
615 start = start_next_frame + sync_length; // skip sync
616 start_callback(start);
620 dump_pack(xfr, offset, pack);
622 offset += pack->length;
626 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
628 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
629 fprintf(stderr, "transfer status %d\n", xfr->status);
630 libusb_free_transfer(xfr);
634 assert(xfr->user_data != nullptr);
635 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
637 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
638 if (xfr->endpoint == 0x84) {
639 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
641 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
643 // Update the transfer with the new assumed width, if we're in the process of changing formats.
644 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
647 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
648 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
649 uint8_t *buf = libusb_control_transfer_get_data(xfr);
651 if (setup->wIndex == 44) {
652 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
654 printf("read register %2d: 0x%02x%02x%02x%02x\n",
655 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
658 memcpy(usb->register_file + usb->current_register, buf, 4);
659 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
660 if (usb->current_register == 0) {
661 // read through all of them
662 printf("register dump:");
663 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
664 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
668 libusb_fill_control_setup(xfr->buffer,
669 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
670 /*index=*/usb->current_register, /*length=*/4);
675 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
676 for (i = 0; i < xfr->actual_length; i++) {
677 printf("%02x", xfr->buffer[i]);
687 int rc = libusb_submit_transfer(xfr);
689 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
694 void BMUSBCapture::usb_thread_func()
697 memset(¶m, 0, sizeof(param));
698 param.sched_priority = 1;
699 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
700 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
702 while (!should_quit) {
703 int rc = libusb_handle_events(nullptr);
704 if (rc != LIBUSB_SUCCESS)
709 struct USBCardDevice {
712 libusb_device *device;
715 libusb_device_handle *open_card(int card_index, string *description)
717 libusb_device **devices;
718 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
719 if (num_devices == -1) {
720 fprintf(stderr, "Error finding USB devices\n");
723 vector<USBCardDevice> found_cards;
724 for (ssize_t i = 0; i < num_devices; ++i) {
725 libusb_device_descriptor desc;
726 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
727 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
731 uint8_t bus = libusb_get_bus_number(devices[i]);
732 uint8_t port = libusb_get_port_number(devices[i]);
734 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
735 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
736 libusb_unref_device(devices[i]);
740 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
742 libusb_free_device_list(devices, 0);
744 // Sort the devices to get a consistent ordering.
745 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
746 if (a.product != b.product)
747 return a.product < b.product;
749 return a.bus < b.bus;
750 return a.port < b.port;
753 for (size_t i = 0; i < found_cards.size(); ++i) {
754 const char *product_name = nullptr;
755 if (found_cards[i].product == 0xbd3b) {
756 product_name = "Intensity Shuttle";
757 } else if (found_cards[i].product == 0xbd4f) {
758 product_name = "UltraStudio SDI";
764 snprintf(buf, sizeof(buf), "Card %d: Bus %03u Device %03u %s",
765 int(i), found_cards[i].bus, found_cards[i].port, product_name);
766 if (i == size_t(card_index)) {
769 fprintf(stderr, "%s\n", buf);
772 if (size_t(card_index) >= found_cards.size()) {
773 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
777 libusb_device_handle *devh;
778 int rc = libusb_open(found_cards[card_index].device, &devh);
780 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
784 for (size_t i = 0; i < found_cards.size(); ++i) {
785 libusb_unref_device(found_cards[i].device);
791 void BMUSBCapture::configure_card()
793 if (video_frame_allocator == nullptr) {
794 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
796 if (audio_frame_allocator == nullptr) {
797 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
799 dequeue_thread_should_quit = false;
800 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
803 struct libusb_transfer *xfr;
805 rc = libusb_init(nullptr);
807 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
811 libusb_device_handle *devh = open_card(card_index, &description);
813 fprintf(stderr, "Error finding USB device\n");
817 libusb_config_descriptor *config;
818 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
820 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
825 printf("%d interface\n", config->bNumInterfaces);
826 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
827 printf(" interface %d\n", interface_number);
828 const libusb_interface *interface = &config->interface[interface_number];
829 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
830 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
831 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
832 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
833 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
834 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
840 rc = libusb_set_configuration(devh, /*configuration=*/1);
842 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
846 rc = libusb_claim_interface(devh, 0);
848 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
852 // Alternate setting 1 is output, alternate setting 2 is input.
853 // Card is reset when switching alternates, so the driver uses
854 // this “double switch” when it wants to reset.
856 // There's also alternate settings 3 and 4, which seem to be
857 // like 1 and 2 except they advertise less bandwidth needed.
858 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
860 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
863 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
865 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
869 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
871 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
877 rc = libusb_claim_interface(devh, 3);
879 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
885 // 44 is some kind of timer register (first 16 bits count upwards)
886 // 24 is some sort of watchdog?
887 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
888 // (or will go to 0x73c60010?), also seen 0x73c60100
889 // 12 also changes all the time, unclear why
890 // 16 seems to be autodetected mode somehow
891 // -- this is e00115e0 after reset?
892 // ed0115e0 after mode change [to output?]
893 // 2d0015e0 after more mode change [to input]
894 // ed0115e0 after more mode change
895 // 2d0015e0 after more mode change
897 // 390115e0 seems to indicate we have signal
898 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
900 // 200015e0 on startup
901 // changes to 250115e0 when we sync to the signal
903 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
905 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
907 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
908 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
910 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
911 // perhaps some of them are related to analog output?
913 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
914 // but the driver sets it to 0x8036802a at some point.
916 // all of this is on request 214/215. other requests (192, 219,
917 // 222, 223, 224) are used for firmware upgrade. Probably best to
918 // stay out of it unless you know what you're doing.
922 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
925 // 0x01 - stable signal
927 // 0x08 - unknown (audio??)
937 static const ctrl ctrls[] = {
938 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
939 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
941 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
943 // clearing the 0x08000000 bit seems to change the capture format (other source?)
944 // 0x10000000 = analog audio instead of embedded audio, it seems
945 // 0x3a000000 = component video? (analog audio)
946 // 0x3c000000 = composite video? (analog audio)
947 // 0x3e000000 = s-video? (analog audio)
948 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
949 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
950 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
951 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
952 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
955 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
956 uint32_t flipped = htonl(ctrls[req].data);
957 static uint8_t value[4];
958 memcpy(value, &flipped, sizeof(flipped));
959 int size = sizeof(value);
960 //if (ctrls[req].request == 215) size = 0;
961 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
962 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
964 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
968 if (ctrls[req].index == 16 && rc == 4) {
969 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
973 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
974 for (int i = 0; i < rc; ++i) {
975 printf("%02x", value[i]);
984 static int my_index = 0;
985 static uint8_t value[4];
986 int size = sizeof(value);
987 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
988 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
990 fprintf(stderr, "Error on control\n");
993 printf("rc=%d index=%d: 0x", rc, my_index);
994 for (int i = 0; i < rc; ++i) {
995 printf("%02x", value[i]);
1002 // set up an asynchronous transfer of the timer register
1003 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1004 static int completed = 0;
1006 xfr = libusb_alloc_transfer(0);
1007 libusb_fill_control_setup(cmdbuf,
1008 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1009 /*index=*/44, /*length=*/4);
1010 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1011 xfr->user_data = this;
1012 libusb_submit_transfer(xfr);
1014 // set up an asynchronous transfer of register 24
1015 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1016 static int completed2 = 0;
1018 xfr = libusb_alloc_transfer(0);
1019 libusb_fill_control_setup(cmdbuf2,
1020 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1021 /*index=*/24, /*length=*/4);
1022 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1023 xfr->user_data = this;
1024 libusb_submit_transfer(xfr);
1027 // set up an asynchronous transfer of the register dump
1028 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1029 static int completed3 = 0;
1031 xfr = libusb_alloc_transfer(0);
1032 libusb_fill_control_setup(cmdbuf3,
1033 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1034 /*index=*/current_register, /*length=*/4);
1035 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1036 xfr->user_data = this;
1037 //libusb_submit_transfer(xfr);
1039 audiofp = fopen("audio.raw", "wb");
1041 // set up isochronous transfers for audio and video
1042 for (int e = 3; e <= 4; ++e) {
1043 //int num_transfers = (e == 3) ? 6 : 6;
1044 int num_transfers = 10;
1045 for (int i = 0; i < num_transfers; ++i) {
1047 int num_iso_pack, size;
1049 // Allocate for minimum width (because that will give us the most
1050 // number of packets, so we don't need to reallocated, but we'll
1051 // default to 720p for the first frame.
1052 size = find_xfer_size_for_width(MIN_WIDTH);
1053 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1054 buf_size = USB_VIDEO_TRANSFER_SIZE;
1058 buf_size = num_iso_pack * size;
1060 assert(size_t(num_iso_pack * size) <= buf_size);
1061 uint8_t *buf = new uint8_t[buf_size];
1063 xfr = libusb_alloc_transfer(num_iso_pack);
1065 fprintf(stderr, "oom\n");
1069 int ep = LIBUSB_ENDPOINT_IN | e;
1070 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1071 num_iso_pack, cb_xfr, nullptr, 0);
1072 libusb_set_iso_packet_lengths(xfr, size);
1073 xfr->user_data = this;
1076 change_xfer_size_for_width(assumed_frame_width, xfr);
1079 iso_xfrs.push_back(xfr);
1084 void BMUSBCapture::start_bm_capture()
1087 for (libusb_transfer *xfr : iso_xfrs) {
1088 int rc = libusb_submit_transfer(xfr);
1091 //printf("num_bytes=%d\n", num_bytes);
1092 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1093 xfr->endpoint, i, libusb_error_name(rc));
1100 libusb_release_interface(devh, 0);
1104 libusb_exit(nullptr);
1109 void BMUSBCapture::stop_dequeue_thread()
1111 dequeue_thread_should_quit = true;
1112 queues_not_empty.notify_all();
1113 dequeue_thread.join();
1116 void BMUSBCapture::start_bm_thread()
1118 should_quit = false;
1119 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1122 void BMUSBCapture::stop_bm_thread()
1128 struct VideoFormatEntry {
1129 uint16_t normalized_video_format;
1130 unsigned width, height, second_field_start;
1131 unsigned extra_lines_top, extra_lines_bottom;
1132 unsigned frame_rate_nom, frame_rate_den;
1136 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
1138 decoded_video_format->id = video_format;
1139 decoded_video_format->interlaced = false;
1141 // TODO: Add these for all formats as we find them.
1142 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
1144 if (video_format == 0x0800) {
1145 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
1146 // It's a strange thing, but what can you do.
1147 decoded_video_format->width = 720;
1148 decoded_video_format->height = 525;
1149 decoded_video_format->extra_lines_top = 0;
1150 decoded_video_format->extra_lines_bottom = 0;
1151 decoded_video_format->frame_rate_nom = 3013;
1152 decoded_video_format->frame_rate_den = 100;
1153 decoded_video_format->has_signal = false;
1156 if ((video_format & 0xe800) != 0xe800) {
1157 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
1159 decoded_video_format->width = 0;
1160 decoded_video_format->height = 0;
1161 decoded_video_format->extra_lines_top = 0;
1162 decoded_video_format->extra_lines_bottom = 0;
1163 decoded_video_format->frame_rate_nom = 60;
1164 decoded_video_format->frame_rate_den = 1;
1165 decoded_video_format->has_signal = false;
1169 decoded_video_format->has_signal = true;
1171 // NTSC (480i59.94, I suppose). A special case, see below.
1172 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
1173 decoded_video_format->width = 720;
1174 decoded_video_format->height = 480;
1175 decoded_video_format->extra_lines_top = 17;
1176 decoded_video_format->extra_lines_bottom = 28;
1177 decoded_video_format->frame_rate_nom = 30000;
1178 decoded_video_format->frame_rate_den = 1001;
1179 decoded_video_format->second_field_start = 280;
1180 decoded_video_format->interlaced = true;
1184 // PAL (576i50, I suppose). A special case, see below.
1185 if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
1186 decoded_video_format->width = 720;
1187 decoded_video_format->height = 576;
1188 decoded_video_format->extra_lines_top = 22;
1189 decoded_video_format->extra_lines_bottom = 27;
1190 decoded_video_format->frame_rate_nom = 25;
1191 decoded_video_format->frame_rate_den = 1;
1192 decoded_video_format->second_field_start = 335;
1193 decoded_video_format->interlaced = true;
1197 // 0x8 seems to be a flag about availability of deep color on the input,
1198 // except when it's not (e.g. it's the only difference between NTSC
1199 // and PAL). Rather confusing. But we clear it here nevertheless, because
1200 // usually it doesn't mean anything.
1202 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
1203 uint16_t normalized_video_format = video_format & ~0xe80c;
1204 constexpr VideoFormatEntry entries[] = {
1205 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
1206 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
1207 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
1208 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
1209 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1210 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
1211 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
1212 { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
1213 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
1214 { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
1215 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
1216 { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
1217 { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
1218 { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
1219 { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
1221 for (const VideoFormatEntry &entry : entries) {
1222 if (normalized_video_format == entry.normalized_video_format) {
1223 decoded_video_format->width = entry.width;
1224 decoded_video_format->height = entry.height;
1225 decoded_video_format->second_field_start = entry.second_field_start;
1226 decoded_video_format->extra_lines_top = entry.extra_lines_top;
1227 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
1228 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
1229 decoded_video_format->frame_rate_den = entry.frame_rate_den;
1230 decoded_video_format->interlaced = entry.interlaced;
1235 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
1236 decoded_video_format->width = 1280;
1237 decoded_video_format->height = 720;
1238 decoded_video_format->frame_rate_nom = 60;
1239 decoded_video_format->frame_rate_den = 1;