1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 // Audio is more important than video, and also much cheaper.
56 // By having many more audio frames available, hopefully if something
57 // starts to drop, we'll have CPU load go down (from not having to
58 // process as much video) before we have to drop audio.
59 #define NUM_QUEUED_VIDEO_FRAMES 16
60 #define NUM_QUEUED_AUDIO_FRAMES 64
62 class MallocFrameAllocator : public FrameAllocator {
64 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
65 Frame alloc_frame() override;
66 void release_frame(Frame frame) override;
72 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
75 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
76 : frame_size(frame_size)
78 for (size_t i = 0; i < num_queued_frames; ++i) {
79 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
83 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
88 unique_lock<mutex> lock(freelist_mutex); // Meh.
89 if (freelist.empty()) {
90 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
93 vf.data = freelist.top().release();
95 freelist.pop(); // Meh.
100 void MallocFrameAllocator::release_frame(Frame frame)
102 if (frame.overflow > 0) {
103 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
105 unique_lock<mutex> lock(freelist_mutex);
106 freelist.push(unique_ptr<uint8_t[]>(frame.data));
109 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
114 return (b - a < 0x8000);
116 int wrap_b = 0x10000 + int(b);
117 return (wrap_b - a < 0x8000);
121 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
123 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
124 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
125 q->back().timecode, timecode);
126 frame.owner->release_frame(frame);
132 qf.timecode = timecode;
136 unique_lock<mutex> lock(queue_lock);
137 q->push_back(move(qf));
139 queues_not_empty.notify_one(); // might be spurious
142 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
144 FILE *fp = fopen(filename, "wb");
145 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
146 printf("short write!\n");
151 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
153 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
156 void BMUSBCapture::dequeue_thread_func()
158 if (has_dequeue_callbacks) {
159 dequeue_init_callback();
161 while (!dequeue_thread_should_quit) {
162 unique_lock<mutex> lock(queue_lock);
163 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
165 uint16_t video_timecode = pending_video_frames.front().timecode;
166 uint16_t audio_timecode = pending_audio_frames.front().timecode;
167 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
168 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
170 video_frame_allocator->release_frame(pending_video_frames.front().frame);
171 pending_video_frames.pop_front();
172 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
173 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
175 QueuedFrame audio_frame = pending_audio_frames.front();
176 pending_audio_frames.pop_front();
178 frame_callback(audio_timecode,
179 FrameAllocator::Frame(), 0, 0x0000,
180 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
182 QueuedFrame video_frame = pending_video_frames.front();
183 QueuedFrame audio_frame = pending_audio_frames.front();
184 pending_audio_frames.pop_front();
185 pending_video_frames.pop_front();
190 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
191 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
192 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
195 frame_callback(video_timecode,
196 video_frame.frame, HEADER_SIZE, video_frame.format,
197 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
200 if (has_dequeue_callbacks) {
201 dequeue_cleanup_callback();
205 void BMUSBCapture::start_new_frame(const uint8_t *start)
207 uint16_t format = (start[3] << 8) | start[2];
208 uint16_t timecode = (start[1] << 8) | start[0];
210 if (current_video_frame.len > 0) {
211 // If format is 0x0800 (no signal), add a fake (empty) audio
212 // frame to get it out of the queue.
213 // TODO: Figure out if there are other formats that come with
214 // no audio, and treat them the same.
215 if (format == 0x0800) {
216 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
217 if (fake_audio_frame.data == nullptr) {
218 // Oh well, it's just a no-signal frame anyway.
219 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
220 current_video_frame.owner->release_frame(current_video_frame);
221 current_video_frame = video_frame_allocator->alloc_frame();
224 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
227 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
229 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
231 // //start[7], start[6], start[5], start[4],
232 // read_current_frame, FRAME_SIZE);
234 current_video_frame = video_frame_allocator->alloc_frame();
235 //if (current_video_frame.data == nullptr) {
236 // read_current_frame = -1;
238 // read_current_frame = 0;
242 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
244 uint16_t format = (start[3] << 8) | start[2];
245 uint16_t timecode = (start[1] << 8) | start[0];
246 if (current_audio_frame.len > 0) {
247 //dump_audio_block();
248 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
250 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
251 // format, timecode, read_current_audio_block);
252 current_audio_frame = audio_frame_allocator->alloc_frame();
256 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
258 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
259 for (unsigned j = 0; j < pack->actual_length; j++) {
260 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
261 printf("%02x", xfr->buffer[j + offset]);
264 else if ((j % 8) == 7)
272 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
275 uint8_t *dptr1 = dest1;
276 uint8_t *dptr2 = dest2;
278 for (size_t i = 0; i < n; i += 2) {
284 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
286 if (current_frame->data == nullptr ||
287 current_frame->len > current_frame->size ||
292 int bytes = end - start;
293 if (current_frame->len + bytes > current_frame->size) {
294 current_frame->overflow = current_frame->len + bytes - current_frame->size;
295 current_frame->len = current_frame->size;
296 if (current_frame->overflow > 1048576) {
297 printf("%d bytes overflow after last %s frame\n",
298 int(current_frame->overflow), frame_type_name);
299 current_frame->overflow = 0;
303 if (current_frame->interleaved) {
304 uint8_t *data = current_frame->data + current_frame->len / 2;
305 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
306 if (current_frame->len % 2 == 1) {
310 if (bytes % 2 == 1) {
313 ++current_frame->len;
316 memcpy_interleaved(data, data2, start, bytes);
317 current_frame->len += bytes;
319 memcpy(current_frame->data + current_frame->len, start, bytes);
320 current_frame->len += bytes;
328 void avx2_dump(const char *name, __m256i n)
330 printf("%-10s:", name);
331 printf(" %02x", _mm256_extract_epi8(n, 0));
332 printf(" %02x", _mm256_extract_epi8(n, 1));
333 printf(" %02x", _mm256_extract_epi8(n, 2));
334 printf(" %02x", _mm256_extract_epi8(n, 3));
335 printf(" %02x", _mm256_extract_epi8(n, 4));
336 printf(" %02x", _mm256_extract_epi8(n, 5));
337 printf(" %02x", _mm256_extract_epi8(n, 6));
338 printf(" %02x", _mm256_extract_epi8(n, 7));
340 printf(" %02x", _mm256_extract_epi8(n, 8));
341 printf(" %02x", _mm256_extract_epi8(n, 9));
342 printf(" %02x", _mm256_extract_epi8(n, 10));
343 printf(" %02x", _mm256_extract_epi8(n, 11));
344 printf(" %02x", _mm256_extract_epi8(n, 12));
345 printf(" %02x", _mm256_extract_epi8(n, 13));
346 printf(" %02x", _mm256_extract_epi8(n, 14));
347 printf(" %02x", _mm256_extract_epi8(n, 15));
349 printf(" %02x", _mm256_extract_epi8(n, 16));
350 printf(" %02x", _mm256_extract_epi8(n, 17));
351 printf(" %02x", _mm256_extract_epi8(n, 18));
352 printf(" %02x", _mm256_extract_epi8(n, 19));
353 printf(" %02x", _mm256_extract_epi8(n, 20));
354 printf(" %02x", _mm256_extract_epi8(n, 21));
355 printf(" %02x", _mm256_extract_epi8(n, 22));
356 printf(" %02x", _mm256_extract_epi8(n, 23));
358 printf(" %02x", _mm256_extract_epi8(n, 24));
359 printf(" %02x", _mm256_extract_epi8(n, 25));
360 printf(" %02x", _mm256_extract_epi8(n, 26));
361 printf(" %02x", _mm256_extract_epi8(n, 27));
362 printf(" %02x", _mm256_extract_epi8(n, 28));
363 printf(" %02x", _mm256_extract_epi8(n, 29));
364 printf(" %02x", _mm256_extract_epi8(n, 30));
365 printf(" %02x", _mm256_extract_epi8(n, 31));
370 // Does a memcpy and memchr in one to reduce processing time.
371 // Note that the benefit is somewhat limited if your L3 cache is small,
372 // as you'll (unfortunately) spend most of the time loading the data
375 // Complicated cases are left to the slow path; it basically stops copying
376 // up until the first instance of "sync_char" (usually a bit before, actually).
377 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
378 // data, and what we really need this for is the 00 00 ff ff marker in video data.
379 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
381 if (current_frame->data == nullptr ||
382 current_frame->len > current_frame->size ||
386 size_t orig_bytes = limit - start;
387 if (orig_bytes < 128) {
392 // Don't read more bytes than we can write.
393 limit = min(limit, start + (current_frame->size - current_frame->len));
395 // Align end to 32 bytes.
396 limit = (const uint8_t *)(intptr_t(limit) & ~31);
398 if (start >= limit) {
402 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
403 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
404 if (aligned_start != start) {
405 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
406 if (sync_start == nullptr) {
407 add_to_frame(current_frame, "", start, aligned_start);
409 add_to_frame(current_frame, "", start, sync_start);
414 // Make the length a multiple of 64.
415 if (current_frame->interleaved) {
416 if (((limit - aligned_start) % 64) != 0) {
419 assert(((limit - aligned_start) % 64) == 0);
423 const __m256i needle = _mm256_set1_epi8(sync_char);
425 const __restrict __m256i *in = (const __m256i *)aligned_start;
426 if (current_frame->interleaved) {
427 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
428 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
429 if (current_frame->len % 2 == 1) {
433 __m256i shuffle_cw = _mm256_set_epi8(
434 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
435 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
436 while (in < (const __m256i *)limit) {
437 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
438 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
439 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
441 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
442 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
443 __m256i found = _mm256_or_si256(found1, found2);
445 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
446 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
448 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
449 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
451 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
452 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
454 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
455 _mm256_storeu_si256(out2, hi);
457 if (!_mm256_testz_si256(found, found)) {
465 current_frame->len += (uint8_t *)in - aligned_start;
467 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
468 while (in < (const __m256i *)limit) {
469 __m256i data = _mm256_load_si256(in);
470 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
471 __m256i found = _mm256_cmpeq_epi8(data, needle);
472 if (!_mm256_testz_si256(found, found)) {
479 current_frame->len = (uint8_t *)out - current_frame->data;
482 const __m128i needle = _mm_set1_epi8(sync_char);
484 const __m128i *in = (const __m128i *)aligned_start;
485 if (current_frame->interleaved) {
486 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
487 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
488 if (current_frame->len % 2 == 1) {
492 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
493 while (in < (const __m128i *)limit) {
494 __m128i data1 = _mm_load_si128(in);
495 __m128i data2 = _mm_load_si128(in + 1);
496 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
497 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
498 __m128i data1_hi = _mm_srli_epi16(data1, 8);
499 __m128i data2_hi = _mm_srli_epi16(data2, 8);
500 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
501 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
502 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
503 _mm_storeu_si128(out2, hi);
504 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
505 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
506 if (!_mm_testz_si128(found1, found1) ||
507 !_mm_testz_si128(found2, found2)) {
515 current_frame->len += (uint8_t *)in - aligned_start;
517 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
518 while (in < (const __m128i *)limit) {
519 __m128i data = _mm_load_si128(in);
520 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
521 __m128i found = _mm_cmpeq_epi8(data, needle);
522 if (!_mm_testz_si128(found, found)) {
529 current_frame->len = (uint8_t *)out - current_frame->data;
533 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
535 return (const uint8_t *)in;
539 void decode_packs(const libusb_transfer *xfr,
540 const char *sync_pattern,
542 FrameAllocator::Frame *current_frame,
543 const char *frame_type_name,
544 function<void(const uint8_t *start)> start_callback)
547 for (int i = 0; i < xfr->num_iso_packets; i++) {
548 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
550 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
551 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
556 const uint8_t *start = xfr->buffer + offset;
557 const uint8_t *limit = start + pack->actual_length;
558 while (start < limit) { // Usually runs only one iteration.
560 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
561 if (start == limit) break;
562 assert(start < limit);
565 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
566 if (start_next_frame == nullptr) {
567 // add the rest of the buffer
568 add_to_frame(current_frame, frame_type_name, start, limit);
571 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
572 start = start_next_frame + sync_length; // skip sync
573 start_callback(start);
577 dump_pack(xfr, offset, pack);
579 offset += pack->length;
583 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
585 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
586 fprintf(stderr, "transfer status %d\n", xfr->status);
587 libusb_free_transfer(xfr);
591 assert(xfr->user_data != nullptr);
592 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
594 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
595 if (xfr->endpoint == 0x84) {
596 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
598 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
601 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
602 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
603 uint8_t *buf = libusb_control_transfer_get_data(xfr);
605 if (setup->wIndex == 44) {
606 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
608 printf("read register %2d: 0x%02x%02x%02x%02x\n",
609 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
612 memcpy(usb->register_file + usb->current_register, buf, 4);
613 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
614 if (usb->current_register == 0) {
615 // read through all of them
616 printf("register dump:");
617 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
618 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
622 libusb_fill_control_setup(xfr->buffer,
623 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
624 /*index=*/usb->current_register, /*length=*/4);
629 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
630 for (i = 0; i < xfr->actual_length; i++) {
631 printf("%02x", xfr->buffer[i]);
641 int rc = libusb_submit_transfer(xfr);
643 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
648 void BMUSBCapture::usb_thread_func()
651 memset(¶m, 0, sizeof(param));
652 param.sched_priority = 1;
653 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
654 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
656 while (!should_quit) {
657 int rc = libusb_handle_events(nullptr);
658 if (rc != LIBUSB_SUCCESS)
663 struct USBCardDevice {
666 libusb_device *device;
669 libusb_device_handle *open_card(int card_index)
671 libusb_device **devices;
672 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
673 if (num_devices == -1) {
674 fprintf(stderr, "Error finding USB devices\n");
677 vector<USBCardDevice> found_cards;
678 for (ssize_t i = 0; i < num_devices; ++i) {
679 libusb_device_descriptor desc;
680 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
681 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
685 uint8_t bus = libusb_get_bus_number(devices[i]);
686 uint8_t port = libusb_get_port_number(devices[i]);
688 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
689 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
690 libusb_unref_device(devices[i]);
694 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
696 libusb_free_device_list(devices, 0);
698 // Sort the devices to get a consistent ordering.
699 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
700 if (a.product != b.product)
701 return a.product < b.product;
703 return a.bus < b.bus;
704 return a.port < b.port;
707 for (size_t i = 0; i < found_cards.size(); ++i) {
708 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
709 if (found_cards[i].product == 0xbd3b) {
710 fprintf(stderr, "Intensity Shuttle\n");
711 } else if (found_cards[i].product == 0xbd4f) {
712 fprintf(stderr, "UltraStudio SDI\n");
718 if (size_t(card_index) >= found_cards.size()) {
719 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
723 libusb_device_handle *devh;
724 int rc = libusb_open(found_cards[card_index].device, &devh);
726 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
730 for (size_t i = 0; i < found_cards.size(); ++i) {
731 libusb_unref_device(found_cards[i].device);
737 void BMUSBCapture::configure_card()
739 if (video_frame_allocator == nullptr) {
740 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
742 if (audio_frame_allocator == nullptr) {
743 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
745 dequeue_thread_should_quit = false;
746 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
749 struct libusb_transfer *xfr;
751 rc = libusb_init(nullptr);
753 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
757 libusb_device_handle *devh = open_card(card_index);
759 fprintf(stderr, "Error finding USB device\n");
763 libusb_config_descriptor *config;
764 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
766 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
769 printf("%d interface\n", config->bNumInterfaces);
770 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
771 printf(" interface %d\n", interface_number);
772 const libusb_interface *interface = &config->interface[interface_number];
773 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
774 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
775 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
776 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
777 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
778 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
783 rc = libusb_set_configuration(devh, /*configuration=*/1);
785 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
789 rc = libusb_claim_interface(devh, 0);
791 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
795 // Alternate setting 1 is output, alternate setting 2 is input.
796 // Card is reset when switching alternates, so the driver uses
797 // this “double switch” when it wants to reset.
799 // There's also alternate settings 3 and 4, which seem to be
800 // like 1 and 2 except they advertise less bandwidth needed.
801 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
803 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
806 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
808 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
812 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
814 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
820 rc = libusb_claim_interface(devh, 3);
822 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
828 // 44 is some kind of timer register (first 16 bits count upwards)
829 // 24 is some sort of watchdog?
830 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
831 // (or will go to 0x73c60010?), also seen 0x73c60100
832 // 12 also changes all the time, unclear why
833 // 16 seems to be autodetected mode somehow
834 // -- this is e00115e0 after reset?
835 // ed0115e0 after mode change [to output?]
836 // 2d0015e0 after more mode change [to input]
837 // ed0115e0 after more mode change
838 // 2d0015e0 after more mode change
840 // 390115e0 seems to indicate we have signal
841 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
843 // 200015e0 on startup
844 // changes to 250115e0 when we sync to the signal
846 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
848 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
850 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
851 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
853 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
854 // perhaps some of them are related to analog output?
856 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
857 // but the driver sets it to 0x8036802a at some point.
859 // all of this is on request 214/215. other requests (192, 219,
860 // 222, 223, 224) are used for firmware upgrade. Probably best to
861 // stay out of it unless you know what you're doing.
865 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
868 // 0x01 - stable signal
870 // 0x08 - unknown (audio??)
880 static const ctrl ctrls[] = {
881 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
882 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
884 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
886 // clearing the 0x08000000 bit seems to change the capture format (other source?)
887 // 0x10000000 = analog audio instead of embedded audio, it seems
888 // 0x3a000000 = component video? (analog audio)
889 // 0x3c000000 = composite video? (analog audio)
890 // 0x3e000000 = s-video? (analog audio)
891 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
892 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
893 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
894 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
895 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
898 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
899 uint32_t flipped = htonl(ctrls[req].data);
900 static uint8_t value[4];
901 memcpy(value, &flipped, sizeof(flipped));
902 int size = sizeof(value);
903 //if (ctrls[req].request == 215) size = 0;
904 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
905 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
907 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
911 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
912 for (int i = 0; i < rc; ++i) {
913 printf("%02x", value[i]);
921 static int my_index = 0;
922 static uint8_t value[4];
923 int size = sizeof(value);
924 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
925 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
927 fprintf(stderr, "Error on control\n");
930 printf("rc=%d index=%d: 0x", rc, my_index);
931 for (int i = 0; i < rc; ++i) {
932 printf("%02x", value[i]);
939 // set up an asynchronous transfer of the timer register
940 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
941 static int completed = 0;
943 xfr = libusb_alloc_transfer(0);
944 libusb_fill_control_setup(cmdbuf,
945 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
946 /*index=*/44, /*length=*/4);
947 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
948 xfr->user_data = this;
949 libusb_submit_transfer(xfr);
951 // set up an asynchronous transfer of register 24
952 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
953 static int completed2 = 0;
955 xfr = libusb_alloc_transfer(0);
956 libusb_fill_control_setup(cmdbuf2,
957 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
958 /*index=*/24, /*length=*/4);
959 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
960 xfr->user_data = this;
961 libusb_submit_transfer(xfr);
964 // set up an asynchronous transfer of the register dump
965 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
966 static int completed3 = 0;
968 xfr = libusb_alloc_transfer(0);
969 libusb_fill_control_setup(cmdbuf3,
970 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
971 /*index=*/current_register, /*length=*/4);
972 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
973 xfr->user_data = this;
974 //libusb_submit_transfer(xfr);
976 audiofp = fopen("audio.raw", "wb");
978 // set up isochronous transfers for audio and video
979 for (int e = 3; e <= 4; ++e) {
980 //int num_transfers = (e == 3) ? 6 : 6;
981 int num_transfers = 10;
982 for (int i = 0; i < num_transfers; ++i) {
983 int num_iso_pack, size;
985 // Video seems to require isochronous packets scaled with the width;
986 // seemingly six lines is about right, rounded up to the required 1kB
988 size = WIDTH * 2 * 6;
989 // Note that for 10-bit input, you'll need to increase size accordingly.
990 //size = size * 4 / 3;
991 if (size % 1024 != 0) {
995 num_iso_pack = (2 << 16) / size; // 128 kB.
996 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
1001 int num_bytes = num_iso_pack * size;
1002 uint8_t *buf = new uint8_t[num_bytes];
1004 xfr = libusb_alloc_transfer(num_iso_pack);
1006 fprintf(stderr, "oom\n");
1010 int ep = LIBUSB_ENDPOINT_IN | e;
1011 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
1012 num_iso_pack, cb_xfr, nullptr, 0);
1013 libusb_set_iso_packet_lengths(xfr, size);
1014 xfr->user_data = this;
1015 iso_xfrs.push_back(xfr);
1020 void BMUSBCapture::start_bm_capture()
1022 printf("starting capture\n");
1024 for (libusb_transfer *xfr : iso_xfrs) {
1025 printf("submitting transfer...\n");
1026 int rc = libusb_submit_transfer(xfr);
1029 //printf("num_bytes=%d\n", num_bytes);
1030 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1031 xfr->endpoint, i, libusb_error_name(rc));
1038 libusb_release_interface(devh, 0);
1042 libusb_exit(nullptr);
1047 void BMUSBCapture::stop_dequeue_thread()
1049 dequeue_thread_should_quit = true;
1050 queues_not_empty.notify_all();
1051 dequeue_thread.join();
1054 void BMUSBCapture::start_bm_thread()
1056 should_quit = false;
1057 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1060 void BMUSBCapture::stop_bm_thread()