1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 // Audio is more important than video, and also much cheaper.
56 // By having many more audio frames available, hopefully if something
57 // starts to drop, we'll have CPU load go down (from not having to
58 // process as much video) before we have to drop audio.
59 #define NUM_QUEUED_VIDEO_FRAMES 16
60 #define NUM_QUEUED_AUDIO_FRAMES 64
62 class MallocFrameAllocator : public FrameAllocator {
64 MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
65 Frame alloc_frame() override;
66 void release_frame(Frame frame) override;
72 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
75 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
76 : frame_size(frame_size)
78 for (size_t i = 0; i < num_queued_frames; ++i) {
79 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
83 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
88 unique_lock<mutex> lock(freelist_mutex); // Meh.
89 if (freelist.empty()) {
90 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
93 vf.data = freelist.top().release();
95 freelist.pop(); // Meh.
100 void MallocFrameAllocator::release_frame(Frame frame)
102 if (frame.overflow > 0) {
103 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
105 unique_lock<mutex> lock(freelist_mutex);
106 freelist.push(unique_ptr<uint8_t[]>(frame.data));
109 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
114 return (b - a < 0x8000);
116 int wrap_b = 0x10000 + int(b);
117 return (wrap_b - a < 0x8000);
121 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
123 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
124 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
125 q->back().timecode, timecode);
126 frame.owner->release_frame(frame);
132 qf.timecode = timecode;
136 unique_lock<mutex> lock(queue_lock);
137 q->push_back(move(qf));
139 queues_not_empty.notify_one(); // might be spurious
142 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
144 FILE *fp = fopen(filename, "wb");
145 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
146 printf("short write!\n");
151 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
153 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
156 void BMUSBCapture::dequeue_thread_func()
158 if (has_dequeue_callbacks) {
159 dequeue_init_callback();
161 while (!dequeue_thread_should_quit) {
162 unique_lock<mutex> lock(queue_lock);
163 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
165 uint16_t video_timecode = pending_video_frames.front().timecode;
166 uint16_t audio_timecode = pending_audio_frames.front().timecode;
167 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
168 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
170 video_frame_allocator->release_frame(pending_video_frames.front().frame);
171 pending_video_frames.pop_front();
172 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
173 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
175 QueuedFrame audio_frame = pending_audio_frames.front();
176 pending_audio_frames.pop_front();
178 frame_callback(audio_timecode,
179 FrameAllocator::Frame(), 0, 0x0000,
180 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
182 QueuedFrame video_frame = pending_video_frames.front();
183 QueuedFrame audio_frame = pending_audio_frames.front();
184 pending_audio_frames.pop_front();
185 pending_video_frames.pop_front();
190 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
191 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
192 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
195 frame_callback(video_timecode,
196 video_frame.frame, HEADER_SIZE, video_frame.format,
197 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
200 if (has_dequeue_callbacks) {
201 dequeue_cleanup_callback();
205 void BMUSBCapture::start_new_frame(const uint8_t *start)
207 uint16_t format = (start[3] << 8) | start[2];
208 uint16_t timecode = (start[1] << 8) | start[0];
210 if (current_video_frame.len > 0) {
211 // If format is 0x0800 (no signal), add a fake (empty) audio
212 // frame to get it out of the queue.
213 // TODO: Figure out if there are other formats that come with
214 // no audio, and treat them the same.
215 if (format == 0x0800) {
216 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
217 if (fake_audio_frame.data == nullptr) {
218 // Oh well, it's just a no-signal frame anyway.
219 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
220 current_video_frame.owner->release_frame(current_video_frame);
221 current_video_frame = video_frame_allocator->alloc_frame();
224 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
227 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
229 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
231 // //start[7], start[6], start[5], start[4],
232 // read_current_frame, FRAME_SIZE);
234 current_video_frame = video_frame_allocator->alloc_frame();
235 //if (current_video_frame.data == nullptr) {
236 // read_current_frame = -1;
238 // read_current_frame = 0;
242 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
244 uint16_t format = (start[3] << 8) | start[2];
245 uint16_t timecode = (start[1] << 8) | start[0];
246 if (current_audio_frame.len > 0) {
247 //dump_audio_block();
248 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
250 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
251 // format, timecode, read_current_audio_block);
252 current_audio_frame = audio_frame_allocator->alloc_frame();
256 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
258 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
259 for (unsigned j = 0; j < pack->actual_length; j++) {
260 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
261 printf("%02x", xfr->buffer[j + offset]);
264 else if ((j % 8) == 7)
272 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
275 uint8_t *dptr1 = dest1;
276 uint8_t *dptr2 = dest2;
278 for (size_t i = 0; i < n; i += 2) {
284 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
286 if (current_frame->data == nullptr ||
287 current_frame->len > current_frame->size ||
292 int bytes = end - start;
293 if (current_frame->len + bytes > current_frame->size) {
294 current_frame->overflow = current_frame->len + bytes - current_frame->size;
295 current_frame->len = current_frame->size;
296 if (current_frame->overflow > 1048576) {
297 printf("%d bytes overflow after last %s frame\n",
298 int(current_frame->overflow), frame_type_name);
299 current_frame->overflow = 0;
303 if (current_frame->interleaved) {
304 uint8_t *data = current_frame->data + current_frame->len / 2;
305 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
306 if (current_frame->len % 2 == 1) {
310 if (bytes % 2 == 1) {
313 ++current_frame->len;
316 memcpy_interleaved(data, data2, start, bytes);
317 current_frame->len += bytes;
319 memcpy(current_frame->data + current_frame->len, start, bytes);
320 current_frame->len += bytes;
328 void avx2_dump(const char *name, __m256i n)
330 printf("%-10s:", name);
331 printf(" %02x", _mm256_extract_epi8(n, 0));
332 printf(" %02x", _mm256_extract_epi8(n, 1));
333 printf(" %02x", _mm256_extract_epi8(n, 2));
334 printf(" %02x", _mm256_extract_epi8(n, 3));
335 printf(" %02x", _mm256_extract_epi8(n, 4));
336 printf(" %02x", _mm256_extract_epi8(n, 5));
337 printf(" %02x", _mm256_extract_epi8(n, 6));
338 printf(" %02x", _mm256_extract_epi8(n, 7));
340 printf(" %02x", _mm256_extract_epi8(n, 8));
341 printf(" %02x", _mm256_extract_epi8(n, 9));
342 printf(" %02x", _mm256_extract_epi8(n, 10));
343 printf(" %02x", _mm256_extract_epi8(n, 11));
344 printf(" %02x", _mm256_extract_epi8(n, 12));
345 printf(" %02x", _mm256_extract_epi8(n, 13));
346 printf(" %02x", _mm256_extract_epi8(n, 14));
347 printf(" %02x", _mm256_extract_epi8(n, 15));
349 printf(" %02x", _mm256_extract_epi8(n, 16));
350 printf(" %02x", _mm256_extract_epi8(n, 17));
351 printf(" %02x", _mm256_extract_epi8(n, 18));
352 printf(" %02x", _mm256_extract_epi8(n, 19));
353 printf(" %02x", _mm256_extract_epi8(n, 20));
354 printf(" %02x", _mm256_extract_epi8(n, 21));
355 printf(" %02x", _mm256_extract_epi8(n, 22));
356 printf(" %02x", _mm256_extract_epi8(n, 23));
358 printf(" %02x", _mm256_extract_epi8(n, 24));
359 printf(" %02x", _mm256_extract_epi8(n, 25));
360 printf(" %02x", _mm256_extract_epi8(n, 26));
361 printf(" %02x", _mm256_extract_epi8(n, 27));
362 printf(" %02x", _mm256_extract_epi8(n, 28));
363 printf(" %02x", _mm256_extract_epi8(n, 29));
364 printf(" %02x", _mm256_extract_epi8(n, 30));
365 printf(" %02x", _mm256_extract_epi8(n, 31));
370 // Does a memcpy and memchr in one to reduce processing time.
371 // Note that the benefit is somewhat limited if your L3 cache is small,
372 // as you'll (unfortunately) spend most of the time loading the data
375 // Complicated cases are left to the slow path; it basically stops copying
376 // up until the first instance of "sync_char" (usually a bit before, actually).
377 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
378 // data, and what we really need this for is the 00 00 ff ff marker in video data.
379 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
381 if (current_frame->data == nullptr ||
382 current_frame->len > current_frame->size ||
386 size_t orig_bytes = limit - start;
387 if (orig_bytes < 128) {
392 // Don't read more bytes than we can write.
393 limit = min(limit, start + (current_frame->size - current_frame->len));
395 // Align end to 32 bytes.
396 limit = (const uint8_t *)(intptr_t(limit) & ~31);
398 if (start >= limit) {
402 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
403 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
404 if (aligned_start != start) {
405 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
406 if (sync_start == nullptr) {
407 add_to_frame(current_frame, "", start, aligned_start);
409 add_to_frame(current_frame, "", start, sync_start);
414 // Make the length a multiple of 64.
415 if (current_frame->interleaved) {
416 if (((limit - aligned_start) % 64) != 0) {
419 assert(((limit - aligned_start) % 64) == 0);
423 const __m256i needle = _mm256_set1_epi8(sync_char);
425 const __restrict __m256i *in = (const __m256i *)aligned_start;
426 if (current_frame->interleaved) {
427 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
428 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
429 if (current_frame->len % 2 == 1) {
433 __m256i shuffle_cw = _mm256_set_epi8(
434 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
435 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
436 while (in < (const __m256i *)limit) {
437 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
438 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
439 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
441 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
442 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
443 __m256i found = _mm256_or_si256(found1, found2);
445 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
446 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
448 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
449 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
451 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
452 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
454 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
455 _mm256_storeu_si256(out2, hi);
457 if (!_mm256_testz_si256(found, found)) {
465 current_frame->len += (uint8_t *)in - aligned_start;
467 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
468 while (in < (const __m256i *)limit) {
469 __m256i data = _mm256_load_si256(in);
470 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
471 __m256i found = _mm256_cmpeq_epi8(data, needle);
472 if (!_mm256_testz_si256(found, found)) {
479 current_frame->len = (uint8_t *)out - current_frame->data;
482 const __m128i needle = _mm_set1_epi8(sync_char);
484 const __m128i *in = (const __m128i *)aligned_start;
485 if (current_frame->interleaved) {
486 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
487 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
488 if (current_frame->len % 2 == 1) {
492 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
493 while (in < (const __m128i *)limit) {
494 __m128i data1 = _mm_load_si128(in);
495 __m128i data2 = _mm_load_si128(in + 1);
496 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
497 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
498 __m128i data1_hi = _mm_srli_epi16(data1, 8);
499 __m128i data2_hi = _mm_srli_epi16(data2, 8);
500 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
501 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
502 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
503 _mm_storeu_si128(out2, hi);
504 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
505 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
506 if (!_mm_testz_si128(found1, found1) ||
507 !_mm_testz_si128(found2, found2)) {
515 current_frame->len += (uint8_t *)in - aligned_start;
517 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
518 while (in < (const __m128i *)limit) {
519 __m128i data = _mm_load_si128(in);
520 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
521 __m128i found = _mm_cmpeq_epi8(data, needle);
522 if (!_mm_testz_si128(found, found)) {
529 current_frame->len = (uint8_t *)out - current_frame->data;
533 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
535 return (const uint8_t *)in;
539 void decode_packs(const libusb_transfer *xfr,
540 const char *sync_pattern,
542 FrameAllocator::Frame *current_frame,
543 const char *frame_type_name,
544 function<void(const uint8_t *start)> start_callback)
547 for (int i = 0; i < xfr->num_iso_packets; i++) {
548 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
550 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
551 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
556 const uint8_t *start = xfr->buffer + offset;
557 const uint8_t *limit = start + pack->actual_length;
558 while (start < limit) { // Usually runs only one iteration.
560 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
561 if (start == limit) break;
562 assert(start < limit);
565 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
566 if (start_next_frame == nullptr) {
567 // add the rest of the buffer
568 add_to_frame(current_frame, frame_type_name, start, limit);
571 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
572 start = start_next_frame + sync_length; // skip sync
573 start_callback(start);
577 dump_pack(xfr, offset, pack);
579 offset += pack->length;
583 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
585 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
586 fprintf(stderr, "transfer status %d\n", xfr->status);
587 libusb_free_transfer(xfr);
591 assert(xfr->user_data != nullptr);
592 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
594 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
595 if (xfr->endpoint == 0x84) {
596 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
598 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
601 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
602 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
603 uint8_t *buf = libusb_control_transfer_get_data(xfr);
605 if (setup->wIndex == 44) {
606 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
608 printf("read register %2d: 0x%02x%02x%02x%02x\n",
609 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
612 memcpy(usb->register_file + usb->current_register, buf, 4);
613 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
614 if (usb->current_register == 0) {
615 // read through all of them
616 printf("register dump:");
617 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
618 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
622 libusb_fill_control_setup(xfr->buffer,
623 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
624 /*index=*/usb->current_register, /*length=*/4);
629 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
630 for (i = 0; i < xfr->actual_length; i++) {
631 printf("%02x", xfr->buffer[i]);
641 if (libusb_submit_transfer(xfr) < 0) {
642 fprintf(stderr, "error re-submitting URB\n");
647 void BMUSBCapture::usb_thread_func()
650 memset(¶m, 0, sizeof(param));
651 param.sched_priority = 1;
652 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
653 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
655 while (!should_quit) {
656 int rc = libusb_handle_events(nullptr);
657 if (rc != LIBUSB_SUCCESS)
662 void BMUSBCapture::configure_card()
664 if (video_frame_allocator == nullptr) {
665 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
667 if (audio_frame_allocator == nullptr) {
668 set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
670 dequeue_thread_should_quit = false;
671 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
674 struct libusb_transfer *xfr;
676 rc = libusb_init(nullptr);
678 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
682 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
683 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
684 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
686 fprintf(stderr, "Error finding USB device\n");
690 libusb_config_descriptor *config;
691 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
693 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
696 printf("%d interface\n", config->bNumInterfaces);
697 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
698 printf(" interface %d\n", interface_number);
699 const libusb_interface *interface = &config->interface[interface_number];
700 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
701 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
702 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
703 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
704 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
705 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
710 rc = libusb_set_configuration(devh, /*configuration=*/1);
712 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
716 rc = libusb_claim_interface(devh, 0);
718 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
722 // Alternate setting 1 is output, alternate setting 2 is input.
723 // Card is reset when switching alternates, so the driver uses
724 // this “double switch” when it wants to reset.
726 // There's also alternate settings 3 and 4, which seem to be
727 // like 1 and 2 except they advertise less bandwidth needed.
728 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
730 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
733 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
735 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
739 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
741 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
747 rc = libusb_claim_interface(devh, 3);
749 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
755 // 44 is some kind of timer register (first 16 bits count upwards)
756 // 24 is some sort of watchdog?
757 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
758 // (or will go to 0x73c60010?), also seen 0x73c60100
759 // 12 also changes all the time, unclear why
760 // 16 seems to be autodetected mode somehow
761 // -- this is e00115e0 after reset?
762 // ed0115e0 after mode change [to output?]
763 // 2d0015e0 after more mode change [to input]
764 // ed0115e0 after more mode change
765 // 2d0015e0 after more mode change
767 // 390115e0 seems to indicate we have signal
768 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
770 // 200015e0 on startup
771 // changes to 250115e0 when we sync to the signal
773 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
775 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
777 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
778 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
780 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
781 // perhaps some of them are related to analog output?
783 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
784 // but the driver sets it to 0x8036802a at some point.
786 // all of this is on request 214/215. other requests (192, 219,
787 // 222, 223, 224) are used for firmware upgrade. Probably best to
788 // stay out of it unless you know what you're doing.
792 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
795 // 0x01 - stable signal
797 // 0x08 - unknown (audio??)
807 static const ctrl ctrls[] = {
808 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
809 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
811 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
813 // clearing the 0x08000000 bit seems to change the capture format (other source?)
814 // 0x10000000 = analog audio instead of embedded audio, it seems
815 // 0x3a000000 = component video? (analog audio)
816 // 0x3c000000 = composite video? (analog audio)
817 // 0x3e000000 = s-video? (analog audio)
818 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
819 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
820 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
821 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
822 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
825 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
826 uint32_t flipped = htonl(ctrls[req].data);
827 static uint8_t value[4];
828 memcpy(value, &flipped, sizeof(flipped));
829 int size = sizeof(value);
830 //if (ctrls[req].request == 215) size = 0;
831 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
832 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
834 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
838 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
839 for (int i = 0; i < rc; ++i) {
840 printf("%02x", value[i]);
848 static int my_index = 0;
849 static uint8_t value[4];
850 int size = sizeof(value);
851 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
852 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
854 fprintf(stderr, "Error on control\n");
857 printf("rc=%d index=%d: 0x", rc, my_index);
858 for (int i = 0; i < rc; ++i) {
859 printf("%02x", value[i]);
866 // set up an asynchronous transfer of the timer register
867 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
868 static int completed = 0;
870 xfr = libusb_alloc_transfer(0);
871 libusb_fill_control_setup(cmdbuf,
872 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
873 /*index=*/44, /*length=*/4);
874 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
875 xfr->user_data = this;
876 libusb_submit_transfer(xfr);
878 // set up an asynchronous transfer of register 24
879 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
880 static int completed2 = 0;
882 xfr = libusb_alloc_transfer(0);
883 libusb_fill_control_setup(cmdbuf2,
884 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
885 /*index=*/24, /*length=*/4);
886 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
887 xfr->user_data = this;
888 libusb_submit_transfer(xfr);
891 // set up an asynchronous transfer of the register dump
892 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
893 static int completed3 = 0;
895 xfr = libusb_alloc_transfer(0);
896 libusb_fill_control_setup(cmdbuf3,
897 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
898 /*index=*/current_register, /*length=*/4);
899 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
900 xfr->user_data = this;
901 //libusb_submit_transfer(xfr);
903 audiofp = fopen("audio.raw", "wb");
905 // set up isochronous transfers for audio and video
906 for (int e = 3; e <= 4; ++e) {
907 //int num_transfers = (e == 3) ? 6 : 6;
908 int num_transfers = 6;
909 for (int i = 0; i < num_transfers; ++i) {
910 int num_iso_pack, size;
912 // Video seems to require isochronous packets scaled with the width;
913 // seemingly six lines is about right, rounded up to the required 1kB
915 size = WIDTH * 2 * 6;
916 // Note that for 10-bit input, you'll need to increase size accordingly.
917 //size = size * 4 / 3;
918 if (size % 1024 != 0) {
922 num_iso_pack = (2 << 18) / size; // 512 kB.
923 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
928 int num_bytes = num_iso_pack * size;
929 uint8_t *buf = new uint8_t[num_bytes];
931 xfr = libusb_alloc_transfer(num_iso_pack);
933 fprintf(stderr, "oom\n");
937 int ep = LIBUSB_ENDPOINT_IN | e;
938 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
939 num_iso_pack, cb_xfr, nullptr, 0);
940 libusb_set_iso_packet_lengths(xfr, size);
941 xfr->user_data = this;
942 iso_xfrs.push_back(xfr);
947 void BMUSBCapture::start_bm_capture()
949 printf("starting capture\n");
951 for (libusb_transfer *xfr : iso_xfrs) {
952 printf("submitting transfer...\n");
953 int rc = libusb_submit_transfer(xfr);
956 //printf("num_bytes=%d\n", num_bytes);
957 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
958 xfr->endpoint, i, libusb_error_name(rc));
965 libusb_release_interface(devh, 0);
969 libusb_exit(nullptr);
974 void BMUSBCapture::stop_dequeue_thread()
976 dequeue_thread_should_quit = true;
977 queues_not_empty.notify_all();
978 dequeue_thread.join();
981 void BMUSBCapture::start_bm_thread()
984 usb_thread = thread(&BMUSBCapture::usb_thread_func);
987 void BMUSBCapture::stop_bm_thread()