1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 // Audio is more important than video, and also much cheaper.
56 // By having many more audio frames available, hopefully if something
57 // starts to drop, we'll have CPU load go down (from not having to
58 // process as much video) before we have to drop audio.
59 #define NUM_QUEUED_VIDEO_FRAMES 16
60 #define NUM_QUEUED_AUDIO_FRAMES 64
62 class MallocFrameAllocator : public FrameAllocator {
64 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
65 Frame alloc_frame() override;
66 void release_frame(Frame frame) override;
72 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
75 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
76 : frame_size(frame_size)
78 for (size_t i = 0; i < num_queued_frames; ++i) {
79 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
83 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
88 unique_lock<mutex> lock(freelist_mutex); // Meh.
89 if (freelist.empty()) {
90 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
93 vf.data = freelist.top().release();
95 freelist.pop(); // Meh.
100 void MallocFrameAllocator::release_frame(Frame frame)
102 if (frame.overflow > 0) {
103 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
105 unique_lock<mutex> lock(freelist_mutex);
106 freelist.push(unique_ptr<uint8_t[]>(frame.data));
109 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
114 return (b - a < 0x8000);
116 int wrap_b = 0x10000 + int(b);
117 return (wrap_b - a < 0x8000);
121 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
123 unique_lock<mutex> lock(queue_lock);
124 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
125 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
126 q->back().timecode, timecode);
127 frame.owner->release_frame(frame);
133 qf.timecode = timecode;
135 q->push_back(move(qf));
136 queues_not_empty.notify_one(); // might be spurious
139 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
141 FILE *fp = fopen(filename, "wb");
142 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
143 printf("short write!\n");
148 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
150 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
153 void BMUSBCapture::dequeue_thread_func()
155 if (has_dequeue_callbacks) {
156 dequeue_init_callback();
158 while (!dequeue_thread_should_quit) {
159 unique_lock<mutex> lock(queue_lock);
160 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
162 if (dequeue_thread_should_quit) break;
164 uint16_t video_timecode = pending_video_frames.front().timecode;
165 uint16_t audio_timecode = pending_audio_frames.front().timecode;
166 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
167 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
169 video_frame_allocator->release_frame(pending_video_frames.front().frame);
170 pending_video_frames.pop_front();
171 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
172 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
174 QueuedFrame audio_frame = pending_audio_frames.front();
175 pending_audio_frames.pop_front();
177 frame_callback(audio_timecode,
178 FrameAllocator::Frame(), 0, 0x0000,
179 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
181 QueuedFrame video_frame = pending_video_frames.front();
182 QueuedFrame audio_frame = pending_audio_frames.front();
183 pending_audio_frames.pop_front();
184 pending_video_frames.pop_front();
189 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
190 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
191 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
194 frame_callback(video_timecode,
195 video_frame.frame, HEADER_SIZE, video_frame.format,
196 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
199 if (has_dequeue_callbacks) {
200 dequeue_cleanup_callback();
204 void BMUSBCapture::start_new_frame(const uint8_t *start)
206 uint16_t format = (start[3] << 8) | start[2];
207 uint16_t timecode = (start[1] << 8) | start[0];
209 if (current_video_frame.len > 0) {
210 // If format is 0x0800 (no signal), add a fake (empty) audio
211 // frame to get it out of the queue.
212 // TODO: Figure out if there are other formats that come with
213 // no audio, and treat them the same.
214 if (format == 0x0800) {
215 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
216 if (fake_audio_frame.data == nullptr) {
217 // Oh well, it's just a no-signal frame anyway.
218 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
219 current_video_frame.owner->release_frame(current_video_frame);
220 current_video_frame = video_frame_allocator->alloc_frame();
223 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
226 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
228 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
230 // //start[7], start[6], start[5], start[4],
231 // read_current_frame, FRAME_SIZE);
233 current_video_frame = video_frame_allocator->alloc_frame();
234 //if (current_video_frame.data == nullptr) {
235 // read_current_frame = -1;
237 // read_current_frame = 0;
241 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
243 uint16_t format = (start[3] << 8) | start[2];
244 uint16_t timecode = (start[1] << 8) | start[0];
245 if (current_audio_frame.len > 0) {
246 //dump_audio_block();
247 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
249 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
250 // format, timecode, read_current_audio_block);
251 current_audio_frame = audio_frame_allocator->alloc_frame();
255 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
257 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
258 for (unsigned j = 0; j < pack->actual_length; j++) {
259 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
260 printf("%02x", xfr->buffer[j + offset]);
263 else if ((j % 8) == 7)
271 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
274 uint8_t *dptr1 = dest1;
275 uint8_t *dptr2 = dest2;
277 for (size_t i = 0; i < n; i += 2) {
283 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
285 if (current_frame->data == nullptr ||
286 current_frame->len > current_frame->size ||
291 int bytes = end - start;
292 if (current_frame->len + bytes > current_frame->size) {
293 current_frame->overflow = current_frame->len + bytes - current_frame->size;
294 current_frame->len = current_frame->size;
295 if (current_frame->overflow > 1048576) {
296 printf("%d bytes overflow after last %s frame\n",
297 int(current_frame->overflow), frame_type_name);
298 current_frame->overflow = 0;
302 if (current_frame->interleaved) {
303 uint8_t *data = current_frame->data + current_frame->len / 2;
304 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
305 if (current_frame->len % 2 == 1) {
309 if (bytes % 2 == 1) {
312 ++current_frame->len;
315 memcpy_interleaved(data, data2, start, bytes);
316 current_frame->len += bytes;
318 memcpy(current_frame->data + current_frame->len, start, bytes);
319 current_frame->len += bytes;
327 void avx2_dump(const char *name, __m256i n)
329 printf("%-10s:", name);
330 printf(" %02x", _mm256_extract_epi8(n, 0));
331 printf(" %02x", _mm256_extract_epi8(n, 1));
332 printf(" %02x", _mm256_extract_epi8(n, 2));
333 printf(" %02x", _mm256_extract_epi8(n, 3));
334 printf(" %02x", _mm256_extract_epi8(n, 4));
335 printf(" %02x", _mm256_extract_epi8(n, 5));
336 printf(" %02x", _mm256_extract_epi8(n, 6));
337 printf(" %02x", _mm256_extract_epi8(n, 7));
339 printf(" %02x", _mm256_extract_epi8(n, 8));
340 printf(" %02x", _mm256_extract_epi8(n, 9));
341 printf(" %02x", _mm256_extract_epi8(n, 10));
342 printf(" %02x", _mm256_extract_epi8(n, 11));
343 printf(" %02x", _mm256_extract_epi8(n, 12));
344 printf(" %02x", _mm256_extract_epi8(n, 13));
345 printf(" %02x", _mm256_extract_epi8(n, 14));
346 printf(" %02x", _mm256_extract_epi8(n, 15));
348 printf(" %02x", _mm256_extract_epi8(n, 16));
349 printf(" %02x", _mm256_extract_epi8(n, 17));
350 printf(" %02x", _mm256_extract_epi8(n, 18));
351 printf(" %02x", _mm256_extract_epi8(n, 19));
352 printf(" %02x", _mm256_extract_epi8(n, 20));
353 printf(" %02x", _mm256_extract_epi8(n, 21));
354 printf(" %02x", _mm256_extract_epi8(n, 22));
355 printf(" %02x", _mm256_extract_epi8(n, 23));
357 printf(" %02x", _mm256_extract_epi8(n, 24));
358 printf(" %02x", _mm256_extract_epi8(n, 25));
359 printf(" %02x", _mm256_extract_epi8(n, 26));
360 printf(" %02x", _mm256_extract_epi8(n, 27));
361 printf(" %02x", _mm256_extract_epi8(n, 28));
362 printf(" %02x", _mm256_extract_epi8(n, 29));
363 printf(" %02x", _mm256_extract_epi8(n, 30));
364 printf(" %02x", _mm256_extract_epi8(n, 31));
369 // Does a memcpy and memchr in one to reduce processing time.
370 // Note that the benefit is somewhat limited if your L3 cache is small,
371 // as you'll (unfortunately) spend most of the time loading the data
374 // Complicated cases are left to the slow path; it basically stops copying
375 // up until the first instance of "sync_char" (usually a bit before, actually).
376 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
377 // data, and what we really need this for is the 00 00 ff ff marker in video data.
378 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
380 if (current_frame->data == nullptr ||
381 current_frame->len > current_frame->size ||
385 size_t orig_bytes = limit - start;
386 if (orig_bytes < 128) {
391 // Don't read more bytes than we can write.
392 limit = min(limit, start + (current_frame->size - current_frame->len));
394 // Align end to 32 bytes.
395 limit = (const uint8_t *)(intptr_t(limit) & ~31);
397 if (start >= limit) {
401 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
402 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
403 if (aligned_start != start) {
404 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
405 if (sync_start == nullptr) {
406 add_to_frame(current_frame, "", start, aligned_start);
408 add_to_frame(current_frame, "", start, sync_start);
413 // Make the length a multiple of 64.
414 if (current_frame->interleaved) {
415 if (((limit - aligned_start) % 64) != 0) {
418 assert(((limit - aligned_start) % 64) == 0);
422 const __m256i needle = _mm256_set1_epi8(sync_char);
424 const __restrict __m256i *in = (const __m256i *)aligned_start;
425 if (current_frame->interleaved) {
426 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
427 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
428 if (current_frame->len % 2 == 1) {
432 __m256i shuffle_cw = _mm256_set_epi8(
433 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
434 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
435 while (in < (const __m256i *)limit) {
436 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
437 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
438 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
440 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
441 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
442 __m256i found = _mm256_or_si256(found1, found2);
444 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
445 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
447 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
448 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
450 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
451 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
453 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
454 _mm256_storeu_si256(out2, hi);
456 if (!_mm256_testz_si256(found, found)) {
464 current_frame->len += (uint8_t *)in - aligned_start;
466 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
467 while (in < (const __m256i *)limit) {
468 __m256i data = _mm256_load_si256(in);
469 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
470 __m256i found = _mm256_cmpeq_epi8(data, needle);
471 if (!_mm256_testz_si256(found, found)) {
478 current_frame->len = (uint8_t *)out - current_frame->data;
481 const __m128i needle = _mm_set1_epi8(sync_char);
483 const __m128i *in = (const __m128i *)aligned_start;
484 if (current_frame->interleaved) {
485 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
486 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
487 if (current_frame->len % 2 == 1) {
491 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
492 while (in < (const __m128i *)limit) {
493 __m128i data1 = _mm_load_si128(in);
494 __m128i data2 = _mm_load_si128(in + 1);
495 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
496 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
497 __m128i data1_hi = _mm_srli_epi16(data1, 8);
498 __m128i data2_hi = _mm_srli_epi16(data2, 8);
499 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
500 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
501 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
502 _mm_storeu_si128(out2, hi);
503 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
504 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
505 if (!_mm_testz_si128(found1, found1) ||
506 !_mm_testz_si128(found2, found2)) {
514 current_frame->len += (uint8_t *)in - aligned_start;
516 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
517 while (in < (const __m128i *)limit) {
518 __m128i data = _mm_load_si128(in);
519 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
520 __m128i found = _mm_cmpeq_epi8(data, needle);
521 if (!_mm_testz_si128(found, found)) {
528 current_frame->len = (uint8_t *)out - current_frame->data;
532 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
534 return (const uint8_t *)in;
538 void decode_packs(const libusb_transfer *xfr,
539 const char *sync_pattern,
541 FrameAllocator::Frame *current_frame,
542 const char *frame_type_name,
543 function<void(const uint8_t *start)> start_callback)
546 for (int i = 0; i < xfr->num_iso_packets; i++) {
547 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
549 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
550 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
555 const uint8_t *start = xfr->buffer + offset;
556 const uint8_t *limit = start + pack->actual_length;
557 while (start < limit) { // Usually runs only one iteration.
559 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
560 if (start == limit) break;
561 assert(start < limit);
564 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
565 if (start_next_frame == nullptr) {
566 // add the rest of the buffer
567 add_to_frame(current_frame, frame_type_name, start, limit);
570 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
571 start = start_next_frame + sync_length; // skip sync
572 start_callback(start);
576 dump_pack(xfr, offset, pack);
578 offset += pack->length;
582 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
584 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
585 fprintf(stderr, "transfer status %d\n", xfr->status);
586 libusb_free_transfer(xfr);
590 assert(xfr->user_data != nullptr);
591 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
593 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
594 if (xfr->endpoint == 0x84) {
595 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
597 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
600 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
601 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
602 uint8_t *buf = libusb_control_transfer_get_data(xfr);
604 if (setup->wIndex == 44) {
605 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
607 printf("read register %2d: 0x%02x%02x%02x%02x\n",
608 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
611 memcpy(usb->register_file + usb->current_register, buf, 4);
612 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
613 if (usb->current_register == 0) {
614 // read through all of them
615 printf("register dump:");
616 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
617 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
621 libusb_fill_control_setup(xfr->buffer,
622 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
623 /*index=*/usb->current_register, /*length=*/4);
628 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
629 for (i = 0; i < xfr->actual_length; i++) {
630 printf("%02x", xfr->buffer[i]);
640 int rc = libusb_submit_transfer(xfr);
642 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
647 void BMUSBCapture::usb_thread_func()
650 memset(¶m, 0, sizeof(param));
651 param.sched_priority = 1;
652 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
653 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
655 while (!should_quit) {
656 int rc = libusb_handle_events(nullptr);
657 if (rc != LIBUSB_SUCCESS)
662 struct USBCardDevice {
665 libusb_device *device;
668 libusb_device_handle *open_card(int card_index)
670 libusb_device **devices;
671 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
672 if (num_devices == -1) {
673 fprintf(stderr, "Error finding USB devices\n");
676 vector<USBCardDevice> found_cards;
677 for (ssize_t i = 0; i < num_devices; ++i) {
678 libusb_device_descriptor desc;
679 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
680 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
684 uint8_t bus = libusb_get_bus_number(devices[i]);
685 uint8_t port = libusb_get_port_number(devices[i]);
687 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
688 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
689 libusb_unref_device(devices[i]);
693 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
695 libusb_free_device_list(devices, 0);
697 // Sort the devices to get a consistent ordering.
698 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
699 if (a.product != b.product)
700 return a.product < b.product;
702 return a.bus < b.bus;
703 return a.port < b.port;
706 for (size_t i = 0; i < found_cards.size(); ++i) {
707 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
708 if (found_cards[i].product == 0xbd3b) {
709 fprintf(stderr, "Intensity Shuttle\n");
710 } else if (found_cards[i].product == 0xbd4f) {
711 fprintf(stderr, "UltraStudio SDI\n");
717 if (size_t(card_index) >= found_cards.size()) {
718 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
722 libusb_device_handle *devh;
723 int rc = libusb_open(found_cards[card_index].device, &devh);
725 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
729 for (size_t i = 0; i < found_cards.size(); ++i) {
730 libusb_unref_device(found_cards[i].device);
736 void BMUSBCapture::configure_card()
738 if (video_frame_allocator == nullptr) {
739 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
741 if (audio_frame_allocator == nullptr) {
742 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
744 dequeue_thread_should_quit = false;
745 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
748 struct libusb_transfer *xfr;
750 rc = libusb_init(nullptr);
752 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
756 libusb_device_handle *devh = open_card(card_index);
758 fprintf(stderr, "Error finding USB device\n");
762 libusb_config_descriptor *config;
763 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
765 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
768 printf("%d interface\n", config->bNumInterfaces);
769 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
770 printf(" interface %d\n", interface_number);
771 const libusb_interface *interface = &config->interface[interface_number];
772 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
773 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
774 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
775 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
776 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
777 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
782 rc = libusb_set_configuration(devh, /*configuration=*/1);
784 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
788 rc = libusb_claim_interface(devh, 0);
790 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
794 // Alternate setting 1 is output, alternate setting 2 is input.
795 // Card is reset when switching alternates, so the driver uses
796 // this “double switch” when it wants to reset.
798 // There's also alternate settings 3 and 4, which seem to be
799 // like 1 and 2 except they advertise less bandwidth needed.
800 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
802 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
805 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
807 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
811 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
813 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
819 rc = libusb_claim_interface(devh, 3);
821 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
827 // 44 is some kind of timer register (first 16 bits count upwards)
828 // 24 is some sort of watchdog?
829 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
830 // (or will go to 0x73c60010?), also seen 0x73c60100
831 // 12 also changes all the time, unclear why
832 // 16 seems to be autodetected mode somehow
833 // -- this is e00115e0 after reset?
834 // ed0115e0 after mode change [to output?]
835 // 2d0015e0 after more mode change [to input]
836 // ed0115e0 after more mode change
837 // 2d0015e0 after more mode change
839 // 390115e0 seems to indicate we have signal
840 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
842 // 200015e0 on startup
843 // changes to 250115e0 when we sync to the signal
845 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
847 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
849 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
850 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
852 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
853 // perhaps some of them are related to analog output?
855 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
856 // but the driver sets it to 0x8036802a at some point.
858 // all of this is on request 214/215. other requests (192, 219,
859 // 222, 223, 224) are used for firmware upgrade. Probably best to
860 // stay out of it unless you know what you're doing.
864 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
867 // 0x01 - stable signal
869 // 0x08 - unknown (audio??)
879 static const ctrl ctrls[] = {
880 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
881 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
883 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
885 // clearing the 0x08000000 bit seems to change the capture format (other source?)
886 // 0x10000000 = analog audio instead of embedded audio, it seems
887 // 0x3a000000 = component video? (analog audio)
888 // 0x3c000000 = composite video? (analog audio)
889 // 0x3e000000 = s-video? (analog audio)
890 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
891 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
892 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
893 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
894 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
897 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
898 uint32_t flipped = htonl(ctrls[req].data);
899 static uint8_t value[4];
900 memcpy(value, &flipped, sizeof(flipped));
901 int size = sizeof(value);
902 //if (ctrls[req].request == 215) size = 0;
903 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
904 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
906 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
910 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
911 for (int i = 0; i < rc; ++i) {
912 printf("%02x", value[i]);
920 static int my_index = 0;
921 static uint8_t value[4];
922 int size = sizeof(value);
923 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
924 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
926 fprintf(stderr, "Error on control\n");
929 printf("rc=%d index=%d: 0x", rc, my_index);
930 for (int i = 0; i < rc; ++i) {
931 printf("%02x", value[i]);
938 // set up an asynchronous transfer of the timer register
939 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
940 static int completed = 0;
942 xfr = libusb_alloc_transfer(0);
943 libusb_fill_control_setup(cmdbuf,
944 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
945 /*index=*/44, /*length=*/4);
946 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
947 xfr->user_data = this;
948 libusb_submit_transfer(xfr);
950 // set up an asynchronous transfer of register 24
951 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
952 static int completed2 = 0;
954 xfr = libusb_alloc_transfer(0);
955 libusb_fill_control_setup(cmdbuf2,
956 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
957 /*index=*/24, /*length=*/4);
958 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
959 xfr->user_data = this;
960 libusb_submit_transfer(xfr);
963 // set up an asynchronous transfer of the register dump
964 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
965 static int completed3 = 0;
967 xfr = libusb_alloc_transfer(0);
968 libusb_fill_control_setup(cmdbuf3,
969 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
970 /*index=*/current_register, /*length=*/4);
971 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
972 xfr->user_data = this;
973 //libusb_submit_transfer(xfr);
975 audiofp = fopen("audio.raw", "wb");
977 // set up isochronous transfers for audio and video
978 for (int e = 3; e <= 4; ++e) {
979 //int num_transfers = (e == 3) ? 6 : 6;
980 int num_transfers = 10;
981 for (int i = 0; i < num_transfers; ++i) {
982 int num_iso_pack, size;
984 // Video seems to require isochronous packets scaled with the width;
985 // seemingly six lines is about right, rounded up to the required 1kB
987 size = WIDTH * 2 * 6;
988 // Note that for 10-bit input, you'll need to increase size accordingly.
989 //size = size * 4 / 3;
990 if (size % 1024 != 0) {
994 num_iso_pack = (2 << 16) / size; // 128 kB.
995 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
1000 int num_bytes = num_iso_pack * size;
1001 uint8_t *buf = new uint8_t[num_bytes];
1003 xfr = libusb_alloc_transfer(num_iso_pack);
1005 fprintf(stderr, "oom\n");
1009 int ep = LIBUSB_ENDPOINT_IN | e;
1010 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
1011 num_iso_pack, cb_xfr, nullptr, 0);
1012 libusb_set_iso_packet_lengths(xfr, size);
1013 xfr->user_data = this;
1014 iso_xfrs.push_back(xfr);
1019 void BMUSBCapture::start_bm_capture()
1021 printf("starting capture\n");
1023 for (libusb_transfer *xfr : iso_xfrs) {
1024 printf("submitting transfer...\n");
1025 int rc = libusb_submit_transfer(xfr);
1028 //printf("num_bytes=%d\n", num_bytes);
1029 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1030 xfr->endpoint, i, libusb_error_name(rc));
1037 libusb_release_interface(devh, 0);
1041 libusb_exit(nullptr);
1046 void BMUSBCapture::stop_dequeue_thread()
1048 dequeue_thread_should_quit = true;
1049 queues_not_empty.notify_all();
1050 dequeue_thread.join();
1053 void BMUSBCapture::start_bm_thread()
1055 should_quit = false;
1056 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1059 void BMUSBCapture::stop_bm_thread()