1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 // Audio is more important than video, and also much cheaper.
56 // By having many more audio frames available, hopefully if something
57 // starts to drop, we'll have CPU load go down (from not having to
58 // process as much video) before we have to drop audio.
59 #define NUM_QUEUED_VIDEO_FRAMES 16
60 #define NUM_QUEUED_AUDIO_FRAMES 64
62 class MallocFrameAllocator : public FrameAllocator {
64 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
65 Frame alloc_frame() override;
66 void release_frame(Frame frame) override;
72 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
75 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
76 : frame_size(frame_size)
78 for (size_t i = 0; i < num_queued_frames; ++i) {
79 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
83 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
88 unique_lock<mutex> lock(freelist_mutex); // Meh.
89 if (freelist.empty()) {
90 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
93 vf.data = freelist.top().release();
95 freelist.pop(); // Meh.
100 void MallocFrameAllocator::release_frame(Frame frame)
102 if (frame.overflow > 0) {
103 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
105 unique_lock<mutex> lock(freelist_mutex);
106 freelist.push(unique_ptr<uint8_t[]>(frame.data));
109 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
114 return (b - a < 0x8000);
116 int wrap_b = 0x10000 + int(b);
117 return (wrap_b - a < 0x8000);
121 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
123 unique_lock<mutex> lock(queue_lock);
124 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
125 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
126 q->back().timecode, timecode);
127 frame.owner->release_frame(frame);
133 qf.timecode = timecode;
135 q->push_back(move(qf));
136 queues_not_empty.notify_one(); // might be spurious
139 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
141 FILE *fp = fopen(filename, "wb");
142 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
143 printf("short write!\n");
148 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
150 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
153 void BMUSBCapture::dequeue_thread_func()
155 if (has_dequeue_callbacks) {
156 dequeue_init_callback();
158 while (!dequeue_thread_should_quit) {
159 unique_lock<mutex> lock(queue_lock);
160 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
162 uint16_t video_timecode = pending_video_frames.front().timecode;
163 uint16_t audio_timecode = pending_audio_frames.front().timecode;
164 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
165 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
167 video_frame_allocator->release_frame(pending_video_frames.front().frame);
168 pending_video_frames.pop_front();
169 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
170 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
172 QueuedFrame audio_frame = pending_audio_frames.front();
173 pending_audio_frames.pop_front();
175 frame_callback(audio_timecode,
176 FrameAllocator::Frame(), 0, 0x0000,
177 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
179 QueuedFrame video_frame = pending_video_frames.front();
180 QueuedFrame audio_frame = pending_audio_frames.front();
181 pending_audio_frames.pop_front();
182 pending_video_frames.pop_front();
187 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
188 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
189 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
192 frame_callback(video_timecode,
193 video_frame.frame, HEADER_SIZE, video_frame.format,
194 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
197 if (has_dequeue_callbacks) {
198 dequeue_cleanup_callback();
202 void BMUSBCapture::start_new_frame(const uint8_t *start)
204 uint16_t format = (start[3] << 8) | start[2];
205 uint16_t timecode = (start[1] << 8) | start[0];
207 if (current_video_frame.len > 0) {
208 // If format is 0x0800 (no signal), add a fake (empty) audio
209 // frame to get it out of the queue.
210 // TODO: Figure out if there are other formats that come with
211 // no audio, and treat them the same.
212 if (format == 0x0800) {
213 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
214 if (fake_audio_frame.data == nullptr) {
215 // Oh well, it's just a no-signal frame anyway.
216 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
217 current_video_frame.owner->release_frame(current_video_frame);
218 current_video_frame = video_frame_allocator->alloc_frame();
221 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
224 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
226 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
228 // //start[7], start[6], start[5], start[4],
229 // read_current_frame, FRAME_SIZE);
231 current_video_frame = video_frame_allocator->alloc_frame();
232 //if (current_video_frame.data == nullptr) {
233 // read_current_frame = -1;
235 // read_current_frame = 0;
239 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
241 uint16_t format = (start[3] << 8) | start[2];
242 uint16_t timecode = (start[1] << 8) | start[0];
243 if (current_audio_frame.len > 0) {
244 //dump_audio_block();
245 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
247 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
248 // format, timecode, read_current_audio_block);
249 current_audio_frame = audio_frame_allocator->alloc_frame();
253 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
255 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
256 for (unsigned j = 0; j < pack->actual_length; j++) {
257 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
258 printf("%02x", xfr->buffer[j + offset]);
261 else if ((j % 8) == 7)
269 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
272 uint8_t *dptr1 = dest1;
273 uint8_t *dptr2 = dest2;
275 for (size_t i = 0; i < n; i += 2) {
281 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
283 if (current_frame->data == nullptr ||
284 current_frame->len > current_frame->size ||
289 int bytes = end - start;
290 if (current_frame->len + bytes > current_frame->size) {
291 current_frame->overflow = current_frame->len + bytes - current_frame->size;
292 current_frame->len = current_frame->size;
293 if (current_frame->overflow > 1048576) {
294 printf("%d bytes overflow after last %s frame\n",
295 int(current_frame->overflow), frame_type_name);
296 current_frame->overflow = 0;
300 if (current_frame->interleaved) {
301 uint8_t *data = current_frame->data + current_frame->len / 2;
302 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
303 if (current_frame->len % 2 == 1) {
307 if (bytes % 2 == 1) {
310 ++current_frame->len;
313 memcpy_interleaved(data, data2, start, bytes);
314 current_frame->len += bytes;
316 memcpy(current_frame->data + current_frame->len, start, bytes);
317 current_frame->len += bytes;
325 void avx2_dump(const char *name, __m256i n)
327 printf("%-10s:", name);
328 printf(" %02x", _mm256_extract_epi8(n, 0));
329 printf(" %02x", _mm256_extract_epi8(n, 1));
330 printf(" %02x", _mm256_extract_epi8(n, 2));
331 printf(" %02x", _mm256_extract_epi8(n, 3));
332 printf(" %02x", _mm256_extract_epi8(n, 4));
333 printf(" %02x", _mm256_extract_epi8(n, 5));
334 printf(" %02x", _mm256_extract_epi8(n, 6));
335 printf(" %02x", _mm256_extract_epi8(n, 7));
337 printf(" %02x", _mm256_extract_epi8(n, 8));
338 printf(" %02x", _mm256_extract_epi8(n, 9));
339 printf(" %02x", _mm256_extract_epi8(n, 10));
340 printf(" %02x", _mm256_extract_epi8(n, 11));
341 printf(" %02x", _mm256_extract_epi8(n, 12));
342 printf(" %02x", _mm256_extract_epi8(n, 13));
343 printf(" %02x", _mm256_extract_epi8(n, 14));
344 printf(" %02x", _mm256_extract_epi8(n, 15));
346 printf(" %02x", _mm256_extract_epi8(n, 16));
347 printf(" %02x", _mm256_extract_epi8(n, 17));
348 printf(" %02x", _mm256_extract_epi8(n, 18));
349 printf(" %02x", _mm256_extract_epi8(n, 19));
350 printf(" %02x", _mm256_extract_epi8(n, 20));
351 printf(" %02x", _mm256_extract_epi8(n, 21));
352 printf(" %02x", _mm256_extract_epi8(n, 22));
353 printf(" %02x", _mm256_extract_epi8(n, 23));
355 printf(" %02x", _mm256_extract_epi8(n, 24));
356 printf(" %02x", _mm256_extract_epi8(n, 25));
357 printf(" %02x", _mm256_extract_epi8(n, 26));
358 printf(" %02x", _mm256_extract_epi8(n, 27));
359 printf(" %02x", _mm256_extract_epi8(n, 28));
360 printf(" %02x", _mm256_extract_epi8(n, 29));
361 printf(" %02x", _mm256_extract_epi8(n, 30));
362 printf(" %02x", _mm256_extract_epi8(n, 31));
367 // Does a memcpy and memchr in one to reduce processing time.
368 // Note that the benefit is somewhat limited if your L3 cache is small,
369 // as you'll (unfortunately) spend most of the time loading the data
372 // Complicated cases are left to the slow path; it basically stops copying
373 // up until the first instance of "sync_char" (usually a bit before, actually).
374 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
375 // data, and what we really need this for is the 00 00 ff ff marker in video data.
376 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
378 if (current_frame->data == nullptr ||
379 current_frame->len > current_frame->size ||
383 size_t orig_bytes = limit - start;
384 if (orig_bytes < 128) {
389 // Don't read more bytes than we can write.
390 limit = min(limit, start + (current_frame->size - current_frame->len));
392 // Align end to 32 bytes.
393 limit = (const uint8_t *)(intptr_t(limit) & ~31);
395 if (start >= limit) {
399 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
400 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
401 if (aligned_start != start) {
402 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
403 if (sync_start == nullptr) {
404 add_to_frame(current_frame, "", start, aligned_start);
406 add_to_frame(current_frame, "", start, sync_start);
411 // Make the length a multiple of 64.
412 if (current_frame->interleaved) {
413 if (((limit - aligned_start) % 64) != 0) {
416 assert(((limit - aligned_start) % 64) == 0);
420 const __m256i needle = _mm256_set1_epi8(sync_char);
422 const __restrict __m256i *in = (const __m256i *)aligned_start;
423 if (current_frame->interleaved) {
424 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
425 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
426 if (current_frame->len % 2 == 1) {
430 __m256i shuffle_cw = _mm256_set_epi8(
431 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
432 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
433 while (in < (const __m256i *)limit) {
434 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
435 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
436 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
438 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
439 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
440 __m256i found = _mm256_or_si256(found1, found2);
442 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
443 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
445 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
446 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
448 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
449 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
451 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
452 _mm256_storeu_si256(out2, hi);
454 if (!_mm256_testz_si256(found, found)) {
462 current_frame->len += (uint8_t *)in - aligned_start;
464 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
465 while (in < (const __m256i *)limit) {
466 __m256i data = _mm256_load_si256(in);
467 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
468 __m256i found = _mm256_cmpeq_epi8(data, needle);
469 if (!_mm256_testz_si256(found, found)) {
476 current_frame->len = (uint8_t *)out - current_frame->data;
479 const __m128i needle = _mm_set1_epi8(sync_char);
481 const __m128i *in = (const __m128i *)aligned_start;
482 if (current_frame->interleaved) {
483 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
484 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
485 if (current_frame->len % 2 == 1) {
489 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
490 while (in < (const __m128i *)limit) {
491 __m128i data1 = _mm_load_si128(in);
492 __m128i data2 = _mm_load_si128(in + 1);
493 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
494 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
495 __m128i data1_hi = _mm_srli_epi16(data1, 8);
496 __m128i data2_hi = _mm_srli_epi16(data2, 8);
497 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
498 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
499 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
500 _mm_storeu_si128(out2, hi);
501 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
502 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
503 if (!_mm_testz_si128(found1, found1) ||
504 !_mm_testz_si128(found2, found2)) {
512 current_frame->len += (uint8_t *)in - aligned_start;
514 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
515 while (in < (const __m128i *)limit) {
516 __m128i data = _mm_load_si128(in);
517 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
518 __m128i found = _mm_cmpeq_epi8(data, needle);
519 if (!_mm_testz_si128(found, found)) {
526 current_frame->len = (uint8_t *)out - current_frame->data;
530 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
532 return (const uint8_t *)in;
536 void decode_packs(const libusb_transfer *xfr,
537 const char *sync_pattern,
539 FrameAllocator::Frame *current_frame,
540 const char *frame_type_name,
541 function<void(const uint8_t *start)> start_callback)
544 for (int i = 0; i < xfr->num_iso_packets; i++) {
545 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
547 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
548 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
553 const uint8_t *start = xfr->buffer + offset;
554 const uint8_t *limit = start + pack->actual_length;
555 while (start < limit) { // Usually runs only one iteration.
557 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
558 if (start == limit) break;
559 assert(start < limit);
562 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
563 if (start_next_frame == nullptr) {
564 // add the rest of the buffer
565 add_to_frame(current_frame, frame_type_name, start, limit);
568 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
569 start = start_next_frame + sync_length; // skip sync
570 start_callback(start);
574 dump_pack(xfr, offset, pack);
576 offset += pack->length;
580 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
582 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
583 fprintf(stderr, "transfer status %d\n", xfr->status);
584 libusb_free_transfer(xfr);
588 assert(xfr->user_data != nullptr);
589 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
591 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
592 if (xfr->endpoint == 0x84) {
593 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
595 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
598 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
599 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
600 uint8_t *buf = libusb_control_transfer_get_data(xfr);
602 if (setup->wIndex == 44) {
603 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
605 printf("read register %2d: 0x%02x%02x%02x%02x\n",
606 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
609 memcpy(usb->register_file + usb->current_register, buf, 4);
610 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
611 if (usb->current_register == 0) {
612 // read through all of them
613 printf("register dump:");
614 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
615 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
619 libusb_fill_control_setup(xfr->buffer,
620 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
621 /*index=*/usb->current_register, /*length=*/4);
626 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
627 for (i = 0; i < xfr->actual_length; i++) {
628 printf("%02x", xfr->buffer[i]);
638 int rc = libusb_submit_transfer(xfr);
640 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
645 void BMUSBCapture::usb_thread_func()
648 memset(¶m, 0, sizeof(param));
649 param.sched_priority = 1;
650 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
651 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
653 while (!should_quit) {
654 int rc = libusb_handle_events(nullptr);
655 if (rc != LIBUSB_SUCCESS)
660 struct USBCardDevice {
663 libusb_device *device;
666 libusb_device_handle *open_card(int card_index)
668 libusb_device **devices;
669 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
670 if (num_devices == -1) {
671 fprintf(stderr, "Error finding USB devices\n");
674 vector<USBCardDevice> found_cards;
675 for (ssize_t i = 0; i < num_devices; ++i) {
676 libusb_device_descriptor desc;
677 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
678 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
682 uint8_t bus = libusb_get_bus_number(devices[i]);
683 uint8_t port = libusb_get_port_number(devices[i]);
685 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
686 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
687 libusb_unref_device(devices[i]);
691 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
693 libusb_free_device_list(devices, 0);
695 // Sort the devices to get a consistent ordering.
696 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
697 if (a.product != b.product)
698 return a.product < b.product;
700 return a.bus < b.bus;
701 return a.port < b.port;
704 for (size_t i = 0; i < found_cards.size(); ++i) {
705 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
706 if (found_cards[i].product == 0xbd3b) {
707 fprintf(stderr, "Intensity Shuttle\n");
708 } else if (found_cards[i].product == 0xbd4f) {
709 fprintf(stderr, "UltraStudio SDI\n");
715 if (size_t(card_index) >= found_cards.size()) {
716 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
720 libusb_device_handle *devh;
721 int rc = libusb_open(found_cards[card_index].device, &devh);
723 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
727 for (size_t i = 0; i < found_cards.size(); ++i) {
728 libusb_unref_device(found_cards[i].device);
734 void BMUSBCapture::configure_card()
736 if (video_frame_allocator == nullptr) {
737 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
739 if (audio_frame_allocator == nullptr) {
740 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
742 dequeue_thread_should_quit = false;
743 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
746 struct libusb_transfer *xfr;
748 rc = libusb_init(nullptr);
750 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
754 libusb_device_handle *devh = open_card(card_index);
756 fprintf(stderr, "Error finding USB device\n");
760 libusb_config_descriptor *config;
761 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
763 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
766 printf("%d interface\n", config->bNumInterfaces);
767 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
768 printf(" interface %d\n", interface_number);
769 const libusb_interface *interface = &config->interface[interface_number];
770 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
771 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
772 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
773 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
774 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
775 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
780 rc = libusb_set_configuration(devh, /*configuration=*/1);
782 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
786 rc = libusb_claim_interface(devh, 0);
788 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
792 // Alternate setting 1 is output, alternate setting 2 is input.
793 // Card is reset when switching alternates, so the driver uses
794 // this “double switch” when it wants to reset.
796 // There's also alternate settings 3 and 4, which seem to be
797 // like 1 and 2 except they advertise less bandwidth needed.
798 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
800 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
803 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
805 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
809 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
811 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
817 rc = libusb_claim_interface(devh, 3);
819 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
825 // 44 is some kind of timer register (first 16 bits count upwards)
826 // 24 is some sort of watchdog?
827 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
828 // (or will go to 0x73c60010?), also seen 0x73c60100
829 // 12 also changes all the time, unclear why
830 // 16 seems to be autodetected mode somehow
831 // -- this is e00115e0 after reset?
832 // ed0115e0 after mode change [to output?]
833 // 2d0015e0 after more mode change [to input]
834 // ed0115e0 after more mode change
835 // 2d0015e0 after more mode change
837 // 390115e0 seems to indicate we have signal
838 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
840 // 200015e0 on startup
841 // changes to 250115e0 when we sync to the signal
843 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
845 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
847 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
848 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
850 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
851 // perhaps some of them are related to analog output?
853 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
854 // but the driver sets it to 0x8036802a at some point.
856 // all of this is on request 214/215. other requests (192, 219,
857 // 222, 223, 224) are used for firmware upgrade. Probably best to
858 // stay out of it unless you know what you're doing.
862 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
865 // 0x01 - stable signal
867 // 0x08 - unknown (audio??)
877 static const ctrl ctrls[] = {
878 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
879 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
881 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
883 // clearing the 0x08000000 bit seems to change the capture format (other source?)
884 // 0x10000000 = analog audio instead of embedded audio, it seems
885 // 0x3a000000 = component video? (analog audio)
886 // 0x3c000000 = composite video? (analog audio)
887 // 0x3e000000 = s-video? (analog audio)
888 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
889 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
890 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
891 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
892 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
895 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
896 uint32_t flipped = htonl(ctrls[req].data);
897 static uint8_t value[4];
898 memcpy(value, &flipped, sizeof(flipped));
899 int size = sizeof(value);
900 //if (ctrls[req].request == 215) size = 0;
901 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
902 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
904 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
908 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
909 for (int i = 0; i < rc; ++i) {
910 printf("%02x", value[i]);
918 static int my_index = 0;
919 static uint8_t value[4];
920 int size = sizeof(value);
921 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
922 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
924 fprintf(stderr, "Error on control\n");
927 printf("rc=%d index=%d: 0x", rc, my_index);
928 for (int i = 0; i < rc; ++i) {
929 printf("%02x", value[i]);
936 // set up an asynchronous transfer of the timer register
937 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
938 static int completed = 0;
940 xfr = libusb_alloc_transfer(0);
941 libusb_fill_control_setup(cmdbuf,
942 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
943 /*index=*/44, /*length=*/4);
944 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
945 xfr->user_data = this;
946 libusb_submit_transfer(xfr);
948 // set up an asynchronous transfer of register 24
949 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
950 static int completed2 = 0;
952 xfr = libusb_alloc_transfer(0);
953 libusb_fill_control_setup(cmdbuf2,
954 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
955 /*index=*/24, /*length=*/4);
956 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
957 xfr->user_data = this;
958 libusb_submit_transfer(xfr);
961 // set up an asynchronous transfer of the register dump
962 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
963 static int completed3 = 0;
965 xfr = libusb_alloc_transfer(0);
966 libusb_fill_control_setup(cmdbuf3,
967 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
968 /*index=*/current_register, /*length=*/4);
969 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
970 xfr->user_data = this;
971 //libusb_submit_transfer(xfr);
973 audiofp = fopen("audio.raw", "wb");
975 // set up isochronous transfers for audio and video
976 for (int e = 3; e <= 4; ++e) {
977 //int num_transfers = (e == 3) ? 6 : 6;
978 int num_transfers = 10;
979 for (int i = 0; i < num_transfers; ++i) {
980 int num_iso_pack, size;
982 // Video seems to require isochronous packets scaled with the width;
983 // seemingly six lines is about right, rounded up to the required 1kB
985 size = WIDTH * 2 * 6;
986 // Note that for 10-bit input, you'll need to increase size accordingly.
987 //size = size * 4 / 3;
988 if (size % 1024 != 0) {
992 num_iso_pack = (2 << 16) / size; // 128 kB.
993 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
998 int num_bytes = num_iso_pack * size;
999 uint8_t *buf = new uint8_t[num_bytes];
1001 xfr = libusb_alloc_transfer(num_iso_pack);
1003 fprintf(stderr, "oom\n");
1007 int ep = LIBUSB_ENDPOINT_IN | e;
1008 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
1009 num_iso_pack, cb_xfr, nullptr, 0);
1010 libusb_set_iso_packet_lengths(xfr, size);
1011 xfr->user_data = this;
1012 iso_xfrs.push_back(xfr);
1017 void BMUSBCapture::start_bm_capture()
1019 printf("starting capture\n");
1021 for (libusb_transfer *xfr : iso_xfrs) {
1022 printf("submitting transfer...\n");
1023 int rc = libusb_submit_transfer(xfr);
1026 //printf("num_bytes=%d\n", num_bytes);
1027 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1028 xfr->endpoint, i, libusb_error_name(rc));
1035 libusb_release_interface(devh, 0);
1039 libusb_exit(nullptr);
1044 void BMUSBCapture::stop_dequeue_thread()
1046 dequeue_thread_should_quit = true;
1047 queues_not_empty.notify_all();
1048 dequeue_thread.join();
1051 void BMUSBCapture::start_bm_thread()
1053 should_quit = false;
1054 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1057 void BMUSBCapture::stop_bm_thread()