1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 // Audio is more important than video, and also much cheaper.
56 // By having many more audio frames available, hopefully if something
57 // starts to drop, we'll have CPU load go down (from not having to
58 // process as much video) before we have to drop audio.
59 #define NUM_QUEUED_VIDEO_FRAMES 16
60 #define NUM_QUEUED_AUDIO_FRAMES 64
62 class MallocFrameAllocator : public FrameAllocator {
64 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
65 Frame alloc_frame() override;
66 void release_frame(Frame frame) override;
72 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
75 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
76 : frame_size(frame_size)
78 for (size_t i = 0; i < num_queued_frames; ++i) {
79 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
83 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
88 unique_lock<mutex> lock(freelist_mutex); // Meh.
89 if (freelist.empty()) {
90 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
93 vf.data = freelist.top().release();
95 freelist.pop(); // Meh.
100 void MallocFrameAllocator::release_frame(Frame frame)
102 if (frame.overflow > 0) {
103 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
105 unique_lock<mutex> lock(freelist_mutex);
106 freelist.push(unique_ptr<uint8_t[]>(frame.data));
109 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
114 return (b - a < 0x8000);
116 int wrap_b = 0x10000 + int(b);
117 return (wrap_b - a < 0x8000);
121 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
123 unique_lock<mutex> lock(queue_lock);
124 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
125 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
126 q->back().timecode, timecode);
127 frame.owner->release_frame(frame);
133 qf.timecode = timecode;
135 q->push_back(move(qf));
136 queues_not_empty.notify_one(); // might be spurious
139 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
141 FILE *fp = fopen(filename, "wb");
142 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
143 printf("short write!\n");
148 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
150 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
153 void BMUSBCapture::dequeue_thread_func()
155 if (has_dequeue_callbacks) {
156 dequeue_init_callback();
158 while (!dequeue_thread_should_quit) {
159 unique_lock<mutex> lock(queue_lock);
160 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
162 if (dequeue_thread_should_quit) break;
164 uint16_t video_timecode = pending_video_frames.front().timecode;
165 uint16_t audio_timecode = pending_audio_frames.front().timecode;
166 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
167 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
169 QueuedFrame video_frame = pending_video_frames.front();
170 pending_video_frames.pop_front();
172 video_frame_allocator->release_frame(video_frame.frame);
173 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
174 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
176 QueuedFrame audio_frame = pending_audio_frames.front();
177 pending_audio_frames.pop_front();
179 frame_callback(audio_timecode,
180 FrameAllocator::Frame(), 0, 0x0000,
181 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
183 QueuedFrame video_frame = pending_video_frames.front();
184 QueuedFrame audio_frame = pending_audio_frames.front();
185 pending_audio_frames.pop_front();
186 pending_video_frames.pop_front();
191 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
192 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
193 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
196 frame_callback(video_timecode,
197 video_frame.frame, HEADER_SIZE, video_frame.format,
198 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
201 if (has_dequeue_callbacks) {
202 dequeue_cleanup_callback();
206 void BMUSBCapture::start_new_frame(const uint8_t *start)
208 uint16_t format = (start[3] << 8) | start[2];
209 uint16_t timecode = (start[1] << 8) | start[0];
211 if (current_video_frame.len > 0) {
212 // If format is 0x0800 (no signal), add a fake (empty) audio
213 // frame to get it out of the queue.
214 // TODO: Figure out if there are other formats that come with
215 // no audio, and treat them the same.
216 if (format == 0x0800) {
217 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
218 if (fake_audio_frame.data == nullptr) {
219 // Oh well, it's just a no-signal frame anyway.
220 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
221 current_video_frame.owner->release_frame(current_video_frame);
222 current_video_frame = video_frame_allocator->alloc_frame();
225 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
228 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
230 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
232 // //start[7], start[6], start[5], start[4],
233 // read_current_frame, FRAME_SIZE);
235 current_video_frame = video_frame_allocator->alloc_frame();
236 //if (current_video_frame.data == nullptr) {
237 // read_current_frame = -1;
239 // read_current_frame = 0;
243 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
245 uint16_t format = (start[3] << 8) | start[2];
246 uint16_t timecode = (start[1] << 8) | start[0];
247 if (current_audio_frame.len > 0) {
248 //dump_audio_block();
249 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
251 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
252 // format, timecode, read_current_audio_block);
253 current_audio_frame = audio_frame_allocator->alloc_frame();
257 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
259 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
260 for (unsigned j = 0; j < pack->actual_length; j++) {
261 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
262 printf("%02x", xfr->buffer[j + offset]);
265 else if ((j % 8) == 7)
273 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
276 uint8_t *dptr1 = dest1;
277 uint8_t *dptr2 = dest2;
279 for (size_t i = 0; i < n; i += 2) {
285 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
287 if (current_frame->data == nullptr ||
288 current_frame->len > current_frame->size ||
293 int bytes = end - start;
294 if (current_frame->len + bytes > current_frame->size) {
295 current_frame->overflow = current_frame->len + bytes - current_frame->size;
296 current_frame->len = current_frame->size;
297 if (current_frame->overflow > 1048576) {
298 printf("%d bytes overflow after last %s frame\n",
299 int(current_frame->overflow), frame_type_name);
300 current_frame->overflow = 0;
304 if (current_frame->interleaved) {
305 uint8_t *data = current_frame->data + current_frame->len / 2;
306 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
307 if (current_frame->len % 2 == 1) {
311 if (bytes % 2 == 1) {
314 ++current_frame->len;
317 memcpy_interleaved(data, data2, start, bytes);
318 current_frame->len += bytes;
320 memcpy(current_frame->data + current_frame->len, start, bytes);
321 current_frame->len += bytes;
329 void avx2_dump(const char *name, __m256i n)
331 printf("%-10s:", name);
332 printf(" %02x", _mm256_extract_epi8(n, 0));
333 printf(" %02x", _mm256_extract_epi8(n, 1));
334 printf(" %02x", _mm256_extract_epi8(n, 2));
335 printf(" %02x", _mm256_extract_epi8(n, 3));
336 printf(" %02x", _mm256_extract_epi8(n, 4));
337 printf(" %02x", _mm256_extract_epi8(n, 5));
338 printf(" %02x", _mm256_extract_epi8(n, 6));
339 printf(" %02x", _mm256_extract_epi8(n, 7));
341 printf(" %02x", _mm256_extract_epi8(n, 8));
342 printf(" %02x", _mm256_extract_epi8(n, 9));
343 printf(" %02x", _mm256_extract_epi8(n, 10));
344 printf(" %02x", _mm256_extract_epi8(n, 11));
345 printf(" %02x", _mm256_extract_epi8(n, 12));
346 printf(" %02x", _mm256_extract_epi8(n, 13));
347 printf(" %02x", _mm256_extract_epi8(n, 14));
348 printf(" %02x", _mm256_extract_epi8(n, 15));
350 printf(" %02x", _mm256_extract_epi8(n, 16));
351 printf(" %02x", _mm256_extract_epi8(n, 17));
352 printf(" %02x", _mm256_extract_epi8(n, 18));
353 printf(" %02x", _mm256_extract_epi8(n, 19));
354 printf(" %02x", _mm256_extract_epi8(n, 20));
355 printf(" %02x", _mm256_extract_epi8(n, 21));
356 printf(" %02x", _mm256_extract_epi8(n, 22));
357 printf(" %02x", _mm256_extract_epi8(n, 23));
359 printf(" %02x", _mm256_extract_epi8(n, 24));
360 printf(" %02x", _mm256_extract_epi8(n, 25));
361 printf(" %02x", _mm256_extract_epi8(n, 26));
362 printf(" %02x", _mm256_extract_epi8(n, 27));
363 printf(" %02x", _mm256_extract_epi8(n, 28));
364 printf(" %02x", _mm256_extract_epi8(n, 29));
365 printf(" %02x", _mm256_extract_epi8(n, 30));
366 printf(" %02x", _mm256_extract_epi8(n, 31));
371 // Does a memcpy and memchr in one to reduce processing time.
372 // Note that the benefit is somewhat limited if your L3 cache is small,
373 // as you'll (unfortunately) spend most of the time loading the data
376 // Complicated cases are left to the slow path; it basically stops copying
377 // up until the first instance of "sync_char" (usually a bit before, actually).
378 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
379 // data, and what we really need this for is the 00 00 ff ff marker in video data.
380 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
382 if (current_frame->data == nullptr ||
383 current_frame->len > current_frame->size ||
387 size_t orig_bytes = limit - start;
388 if (orig_bytes < 128) {
393 // Don't read more bytes than we can write.
394 limit = min(limit, start + (current_frame->size - current_frame->len));
396 // Align end to 32 bytes.
397 limit = (const uint8_t *)(intptr_t(limit) & ~31);
399 if (start >= limit) {
403 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
404 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
405 if (aligned_start != start) {
406 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
407 if (sync_start == nullptr) {
408 add_to_frame(current_frame, "", start, aligned_start);
410 add_to_frame(current_frame, "", start, sync_start);
415 // Make the length a multiple of 64.
416 if (current_frame->interleaved) {
417 if (((limit - aligned_start) % 64) != 0) {
420 assert(((limit - aligned_start) % 64) == 0);
424 const __m256i needle = _mm256_set1_epi8(sync_char);
426 const __restrict __m256i *in = (const __m256i *)aligned_start;
427 if (current_frame->interleaved) {
428 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
429 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
430 if (current_frame->len % 2 == 1) {
434 __m256i shuffle_cw = _mm256_set_epi8(
435 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
436 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
437 while (in < (const __m256i *)limit) {
438 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
439 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
440 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
442 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
443 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
444 __m256i found = _mm256_or_si256(found1, found2);
446 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
447 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
449 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
450 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
452 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
453 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
455 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
456 _mm256_storeu_si256(out2, hi);
458 if (!_mm256_testz_si256(found, found)) {
466 current_frame->len += (uint8_t *)in - aligned_start;
468 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
469 while (in < (const __m256i *)limit) {
470 __m256i data = _mm256_load_si256(in);
471 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
472 __m256i found = _mm256_cmpeq_epi8(data, needle);
473 if (!_mm256_testz_si256(found, found)) {
480 current_frame->len = (uint8_t *)out - current_frame->data;
483 const __m128i needle = _mm_set1_epi8(sync_char);
485 const __m128i *in = (const __m128i *)aligned_start;
486 if (current_frame->interleaved) {
487 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
488 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
489 if (current_frame->len % 2 == 1) {
493 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
494 while (in < (const __m128i *)limit) {
495 __m128i data1 = _mm_load_si128(in);
496 __m128i data2 = _mm_load_si128(in + 1);
497 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
498 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
499 __m128i data1_hi = _mm_srli_epi16(data1, 8);
500 __m128i data2_hi = _mm_srli_epi16(data2, 8);
501 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
502 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
503 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
504 _mm_storeu_si128(out2, hi);
505 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
506 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
507 if (!_mm_testz_si128(found1, found1) ||
508 !_mm_testz_si128(found2, found2)) {
516 current_frame->len += (uint8_t *)in - aligned_start;
518 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
519 while (in < (const __m128i *)limit) {
520 __m128i data = _mm_load_si128(in);
521 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
522 __m128i found = _mm_cmpeq_epi8(data, needle);
523 if (!_mm_testz_si128(found, found)) {
530 current_frame->len = (uint8_t *)out - current_frame->data;
534 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
536 return (const uint8_t *)in;
540 void decode_packs(const libusb_transfer *xfr,
541 const char *sync_pattern,
543 FrameAllocator::Frame *current_frame,
544 const char *frame_type_name,
545 function<void(const uint8_t *start)> start_callback)
548 for (int i = 0; i < xfr->num_iso_packets; i++) {
549 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
551 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
552 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
557 const uint8_t *start = xfr->buffer + offset;
558 const uint8_t *limit = start + pack->actual_length;
559 while (start < limit) { // Usually runs only one iteration.
561 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
562 if (start == limit) break;
563 assert(start < limit);
566 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
567 if (start_next_frame == nullptr) {
568 // add the rest of the buffer
569 add_to_frame(current_frame, frame_type_name, start, limit);
572 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
573 start = start_next_frame + sync_length; // skip sync
574 start_callback(start);
578 dump_pack(xfr, offset, pack);
580 offset += pack->length;
584 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
586 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
587 fprintf(stderr, "transfer status %d\n", xfr->status);
588 libusb_free_transfer(xfr);
592 assert(xfr->user_data != nullptr);
593 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
595 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
596 if (xfr->endpoint == 0x84) {
597 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
599 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
602 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
603 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
604 uint8_t *buf = libusb_control_transfer_get_data(xfr);
606 if (setup->wIndex == 44) {
607 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
609 printf("read register %2d: 0x%02x%02x%02x%02x\n",
610 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
613 memcpy(usb->register_file + usb->current_register, buf, 4);
614 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
615 if (usb->current_register == 0) {
616 // read through all of them
617 printf("register dump:");
618 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
619 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
623 libusb_fill_control_setup(xfr->buffer,
624 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
625 /*index=*/usb->current_register, /*length=*/4);
630 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
631 for (i = 0; i < xfr->actual_length; i++) {
632 printf("%02x", xfr->buffer[i]);
642 int rc = libusb_submit_transfer(xfr);
644 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
649 void BMUSBCapture::usb_thread_func()
652 memset(¶m, 0, sizeof(param));
653 param.sched_priority = 1;
654 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
655 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
657 while (!should_quit) {
658 int rc = libusb_handle_events(nullptr);
659 if (rc != LIBUSB_SUCCESS)
664 struct USBCardDevice {
667 libusb_device *device;
670 libusb_device_handle *open_card(int card_index)
672 libusb_device **devices;
673 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
674 if (num_devices == -1) {
675 fprintf(stderr, "Error finding USB devices\n");
678 vector<USBCardDevice> found_cards;
679 for (ssize_t i = 0; i < num_devices; ++i) {
680 libusb_device_descriptor desc;
681 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
682 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
686 uint8_t bus = libusb_get_bus_number(devices[i]);
687 uint8_t port = libusb_get_port_number(devices[i]);
689 if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
690 !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
691 libusb_unref_device(devices[i]);
695 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
697 libusb_free_device_list(devices, 0);
699 // Sort the devices to get a consistent ordering.
700 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
701 if (a.product != b.product)
702 return a.product < b.product;
704 return a.bus < b.bus;
705 return a.port < b.port;
708 for (size_t i = 0; i < found_cards.size(); ++i) {
709 fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
710 if (found_cards[i].product == 0xbd3b) {
711 fprintf(stderr, "Intensity Shuttle\n");
712 } else if (found_cards[i].product == 0xbd4f) {
713 fprintf(stderr, "UltraStudio SDI\n");
719 if (size_t(card_index) >= found_cards.size()) {
720 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
724 libusb_device_handle *devh;
725 int rc = libusb_open(found_cards[card_index].device, &devh);
727 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
731 for (size_t i = 0; i < found_cards.size(); ++i) {
732 libusb_unref_device(found_cards[i].device);
738 void BMUSBCapture::configure_card()
740 if (video_frame_allocator == nullptr) {
741 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
743 if (audio_frame_allocator == nullptr) {
744 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
746 dequeue_thread_should_quit = false;
747 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
750 struct libusb_transfer *xfr;
752 rc = libusb_init(nullptr);
754 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
758 libusb_device_handle *devh = open_card(card_index);
760 fprintf(stderr, "Error finding USB device\n");
764 libusb_config_descriptor *config;
765 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
767 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
770 printf("%d interface\n", config->bNumInterfaces);
771 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
772 printf(" interface %d\n", interface_number);
773 const libusb_interface *interface = &config->interface[interface_number];
774 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
775 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
776 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
777 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
778 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
779 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
784 rc = libusb_set_configuration(devh, /*configuration=*/1);
786 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
790 rc = libusb_claim_interface(devh, 0);
792 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
796 // Alternate setting 1 is output, alternate setting 2 is input.
797 // Card is reset when switching alternates, so the driver uses
798 // this “double switch” when it wants to reset.
800 // There's also alternate settings 3 and 4, which seem to be
801 // like 1 and 2 except they advertise less bandwidth needed.
802 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
804 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
807 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
809 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
813 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
815 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
821 rc = libusb_claim_interface(devh, 3);
823 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
829 // 44 is some kind of timer register (first 16 bits count upwards)
830 // 24 is some sort of watchdog?
831 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
832 // (or will go to 0x73c60010?), also seen 0x73c60100
833 // 12 also changes all the time, unclear why
834 // 16 seems to be autodetected mode somehow
835 // -- this is e00115e0 after reset?
836 // ed0115e0 after mode change [to output?]
837 // 2d0015e0 after more mode change [to input]
838 // ed0115e0 after more mode change
839 // 2d0015e0 after more mode change
841 // 390115e0 seems to indicate we have signal
842 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
844 // 200015e0 on startup
845 // changes to 250115e0 when we sync to the signal
847 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
849 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
851 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
852 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
854 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
855 // perhaps some of them are related to analog output?
857 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
858 // but the driver sets it to 0x8036802a at some point.
860 // all of this is on request 214/215. other requests (192, 219,
861 // 222, 223, 224) are used for firmware upgrade. Probably best to
862 // stay out of it unless you know what you're doing.
866 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
869 // 0x01 - stable signal
871 // 0x08 - unknown (audio??)
881 static const ctrl ctrls[] = {
882 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
883 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
885 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
887 // clearing the 0x08000000 bit seems to change the capture format (other source?)
888 // 0x10000000 = analog audio instead of embedded audio, it seems
889 // 0x3a000000 = component video? (analog audio)
890 // 0x3c000000 = composite video? (analog audio)
891 // 0x3e000000 = s-video? (analog audio)
892 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
893 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
894 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
895 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
896 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
899 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
900 uint32_t flipped = htonl(ctrls[req].data);
901 static uint8_t value[4];
902 memcpy(value, &flipped, sizeof(flipped));
903 int size = sizeof(value);
904 //if (ctrls[req].request == 215) size = 0;
905 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
906 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
908 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
912 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
913 for (int i = 0; i < rc; ++i) {
914 printf("%02x", value[i]);
922 static int my_index = 0;
923 static uint8_t value[4];
924 int size = sizeof(value);
925 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
926 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
928 fprintf(stderr, "Error on control\n");
931 printf("rc=%d index=%d: 0x", rc, my_index);
932 for (int i = 0; i < rc; ++i) {
933 printf("%02x", value[i]);
940 // set up an asynchronous transfer of the timer register
941 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
942 static int completed = 0;
944 xfr = libusb_alloc_transfer(0);
945 libusb_fill_control_setup(cmdbuf,
946 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
947 /*index=*/44, /*length=*/4);
948 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
949 xfr->user_data = this;
950 libusb_submit_transfer(xfr);
952 // set up an asynchronous transfer of register 24
953 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
954 static int completed2 = 0;
956 xfr = libusb_alloc_transfer(0);
957 libusb_fill_control_setup(cmdbuf2,
958 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
959 /*index=*/24, /*length=*/4);
960 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
961 xfr->user_data = this;
962 libusb_submit_transfer(xfr);
965 // set up an asynchronous transfer of the register dump
966 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
967 static int completed3 = 0;
969 xfr = libusb_alloc_transfer(0);
970 libusb_fill_control_setup(cmdbuf3,
971 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
972 /*index=*/current_register, /*length=*/4);
973 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
974 xfr->user_data = this;
975 //libusb_submit_transfer(xfr);
977 audiofp = fopen("audio.raw", "wb");
979 // set up isochronous transfers for audio and video
980 for (int e = 3; e <= 4; ++e) {
981 //int num_transfers = (e == 3) ? 6 : 6;
982 int num_transfers = 10;
983 for (int i = 0; i < num_transfers; ++i) {
984 int num_iso_pack, size;
986 // Video seems to require isochronous packets scaled with the width;
987 // seemingly six lines is about right, rounded up to the required 1kB
989 size = WIDTH * 2 * 6;
990 // Note that for 10-bit input, you'll need to increase size accordingly.
991 //size = size * 4 / 3;
992 if (size % 1024 != 0) {
996 num_iso_pack = (2 << 16) / size; // 128 kB.
997 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
1002 int num_bytes = num_iso_pack * size;
1003 uint8_t *buf = new uint8_t[num_bytes];
1005 xfr = libusb_alloc_transfer(num_iso_pack);
1007 fprintf(stderr, "oom\n");
1011 int ep = LIBUSB_ENDPOINT_IN | e;
1012 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
1013 num_iso_pack, cb_xfr, nullptr, 0);
1014 libusb_set_iso_packet_lengths(xfr, size);
1015 xfr->user_data = this;
1016 iso_xfrs.push_back(xfr);
1021 void BMUSBCapture::start_bm_capture()
1023 printf("starting capture\n");
1025 for (libusb_transfer *xfr : iso_xfrs) {
1026 printf("submitting transfer...\n");
1027 int rc = libusb_submit_transfer(xfr);
1030 //printf("num_bytes=%d\n", num_bytes);
1031 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1032 xfr->endpoint, i, libusb_error_name(rc));
1039 libusb_release_interface(devh, 0);
1043 libusb_exit(nullptr);
1048 void BMUSBCapture::stop_dequeue_thread()
1050 dequeue_thread_should_quit = true;
1051 queues_not_empty.notify_all();
1052 dequeue_thread.join();
1055 void BMUSBCapture::start_bm_thread()
1057 should_quit = false;
1058 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1061 void BMUSBCapture::stop_bm_thread()