1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 #define NUM_QUEUED_FRAMES 16
56 class MallocFrameAllocator : public FrameAllocator {
58 MallocFrameAllocator(size_t frame_size);
59 Frame alloc_frame() override;
60 void release_frame(Frame frame) override;
66 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
69 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
70 : frame_size(frame_size)
72 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
73 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
77 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
82 unique_lock<mutex> lock(freelist_mutex); // Meh.
83 if (freelist.empty()) {
84 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
87 vf.data = freelist.top().release();
89 freelist.pop(); // Meh.
94 void MallocFrameAllocator::release_frame(Frame frame)
96 if (frame.overflow > 0) {
97 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
99 unique_lock<mutex> lock(freelist_mutex);
100 freelist.push(unique_ptr<uint8_t[]>(frame.data));
103 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
108 return (b - a < 0x8000);
110 int wrap_b = 0x10000 + int(b);
111 return (wrap_b - a < 0x8000);
115 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
117 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
118 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
119 q->back().timecode, timecode);
120 frame.owner->release_frame(frame);
126 qf.timecode = timecode;
130 unique_lock<mutex> lock(queue_lock);
131 q->push_back(move(qf));
133 queues_not_empty.notify_one(); // might be spurious
136 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
138 FILE *fp = fopen(filename, "wb");
139 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
140 printf("short write!\n");
145 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
147 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
150 void BMUSBCapture::dequeue_thread_func()
152 if (has_dequeue_callbacks) {
153 dequeue_init_callback();
155 while (!dequeue_thread_should_quit) {
156 unique_lock<mutex> lock(queue_lock);
157 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
159 uint16_t video_timecode = pending_video_frames.front().timecode;
160 uint16_t audio_timecode = pending_audio_frames.front().timecode;
161 if (video_timecode < audio_timecode) {
162 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
164 video_frame_allocator->release_frame(pending_video_frames.front().frame);
165 pending_video_frames.pop_front();
166 } else if (audio_timecode < video_timecode) {
167 printf("Audio block 0x%04x without corresponding video block, dropping.\n",
169 audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
170 pending_audio_frames.pop_front();
172 QueuedFrame video_frame = pending_video_frames.front();
173 QueuedFrame audio_frame = pending_audio_frames.front();
174 pending_audio_frames.pop_front();
175 pending_video_frames.pop_front();
180 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
181 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
182 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
185 frame_callback(video_timecode,
186 video_frame.frame, HEADER_SIZE, video_frame.format,
187 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
190 if (has_dequeue_callbacks) {
191 dequeue_cleanup_callback();
195 void BMUSBCapture::start_new_frame(const uint8_t *start)
197 uint16_t format = (start[3] << 8) | start[2];
198 uint16_t timecode = (start[1] << 8) | start[0];
200 if (current_video_frame.len > 0) {
201 // If format is 0x0800 (no signal), add a fake (empty) audio
202 // frame to get it out of the queue.
203 // TODO: Figure out if there are other formats that come with
204 // no audio, and treat them the same.
205 if (format == 0x0800) {
206 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
207 if (fake_audio_frame.data == nullptr) {
208 // Oh well, it's just a no-signal frame anyway.
209 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
210 current_video_frame.owner->release_frame(current_video_frame);
211 current_video_frame = video_frame_allocator->alloc_frame();
214 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
217 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
219 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
221 // //start[7], start[6], start[5], start[4],
222 // read_current_frame, FRAME_SIZE);
224 current_video_frame = video_frame_allocator->alloc_frame();
225 //if (current_video_frame.data == nullptr) {
226 // read_current_frame = -1;
228 // read_current_frame = 0;
232 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
234 uint16_t format = (start[3] << 8) | start[2];
235 uint16_t timecode = (start[1] << 8) | start[0];
236 if (current_audio_frame.len > 0) {
237 //dump_audio_block();
238 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
240 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
241 // format, timecode, read_current_audio_block);
242 current_audio_frame = audio_frame_allocator->alloc_frame();
246 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
248 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
249 for (unsigned j = 0; j < pack->actual_length; j++) {
250 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
251 printf("%02x", xfr->buffer[j + offset]);
254 else if ((j % 8) == 7)
262 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
265 uint8_t *dptr1 = dest1;
266 uint8_t *dptr2 = dest2;
268 for (size_t i = 0; i < n; i += 2) {
274 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
276 if (current_frame->data == nullptr ||
277 current_frame->len > current_frame->size ||
282 int bytes = end - start;
283 if (current_frame->len + bytes > current_frame->size) {
284 current_frame->overflow = current_frame->len + bytes - current_frame->size;
285 current_frame->len = current_frame->size;
286 if (current_frame->overflow > 1048576) {
287 printf("%d bytes overflow after last %s frame\n",
288 int(current_frame->overflow), frame_type_name);
289 current_frame->overflow = 0;
293 if (current_frame->interleaved) {
294 uint8_t *data = current_frame->data + current_frame->len / 2;
295 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
296 if (current_frame->len % 2 == 1) {
300 if (bytes % 2 == 1) {
303 ++current_frame->len;
306 memcpy_interleaved(data, data2, start, bytes);
307 current_frame->len += bytes;
309 memcpy(current_frame->data + current_frame->len, start, bytes);
310 current_frame->len += bytes;
318 void avx2_dump(const char *name, __m256i n)
320 printf("%-10s:", name);
321 printf(" %02x", _mm256_extract_epi8(n, 0));
322 printf(" %02x", _mm256_extract_epi8(n, 1));
323 printf(" %02x", _mm256_extract_epi8(n, 2));
324 printf(" %02x", _mm256_extract_epi8(n, 3));
325 printf(" %02x", _mm256_extract_epi8(n, 4));
326 printf(" %02x", _mm256_extract_epi8(n, 5));
327 printf(" %02x", _mm256_extract_epi8(n, 6));
328 printf(" %02x", _mm256_extract_epi8(n, 7));
330 printf(" %02x", _mm256_extract_epi8(n, 8));
331 printf(" %02x", _mm256_extract_epi8(n, 9));
332 printf(" %02x", _mm256_extract_epi8(n, 10));
333 printf(" %02x", _mm256_extract_epi8(n, 11));
334 printf(" %02x", _mm256_extract_epi8(n, 12));
335 printf(" %02x", _mm256_extract_epi8(n, 13));
336 printf(" %02x", _mm256_extract_epi8(n, 14));
337 printf(" %02x", _mm256_extract_epi8(n, 15));
339 printf(" %02x", _mm256_extract_epi8(n, 16));
340 printf(" %02x", _mm256_extract_epi8(n, 17));
341 printf(" %02x", _mm256_extract_epi8(n, 18));
342 printf(" %02x", _mm256_extract_epi8(n, 19));
343 printf(" %02x", _mm256_extract_epi8(n, 20));
344 printf(" %02x", _mm256_extract_epi8(n, 21));
345 printf(" %02x", _mm256_extract_epi8(n, 22));
346 printf(" %02x", _mm256_extract_epi8(n, 23));
348 printf(" %02x", _mm256_extract_epi8(n, 24));
349 printf(" %02x", _mm256_extract_epi8(n, 25));
350 printf(" %02x", _mm256_extract_epi8(n, 26));
351 printf(" %02x", _mm256_extract_epi8(n, 27));
352 printf(" %02x", _mm256_extract_epi8(n, 28));
353 printf(" %02x", _mm256_extract_epi8(n, 29));
354 printf(" %02x", _mm256_extract_epi8(n, 30));
355 printf(" %02x", _mm256_extract_epi8(n, 31));
360 // Does a memcpy and memchr in one to reduce processing time.
361 // Note that the benefit is somewhat limited if your L3 cache is small,
362 // as you'll (unfortunately) spend most of the time loading the data
365 // Complicated cases are left to the slow path; it basically stops copying
366 // up until the first instance of "sync_char" (usually a bit before, actually).
367 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
368 // data, and what we really need this for is the 00 00 ff ff marker in video data.
369 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
371 if (current_frame->data == nullptr ||
372 current_frame->len > current_frame->size ||
376 size_t orig_bytes = limit - start;
377 if (orig_bytes < 128) {
382 // Don't read more bytes than we can write.
383 limit = min(limit, start + (current_frame->size - current_frame->len));
385 // Align end to 32 bytes.
386 limit = (const uint8_t *)(intptr_t(limit) & ~31);
388 if (start >= limit) {
392 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
393 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
394 if (aligned_start != start) {
395 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
396 if (sync_start == nullptr) {
397 add_to_frame(current_frame, "", start, aligned_start);
399 add_to_frame(current_frame, "", start, sync_start);
404 // Make the length a multiple of 64.
405 if (current_frame->interleaved) {
406 if (((limit - aligned_start) % 64) != 0) {
409 assert(((limit - aligned_start) % 64) == 0);
413 const __m256i needle = _mm256_set1_epi8(sync_char);
415 const __restrict __m256i *in = (const __m256i *)aligned_start;
416 if (current_frame->interleaved) {
417 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
418 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
419 if (current_frame->len % 2 == 1) {
423 __m256i shuffle_cw = _mm256_set_epi8(
424 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
425 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
426 while (in < (const __m256i *)limit) {
427 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
428 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
429 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
431 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
432 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
433 __m256i found = _mm256_or_si256(found1, found2);
435 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
436 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
438 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
439 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
441 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
442 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
444 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
445 _mm256_storeu_si256(out2, hi);
447 if (!_mm256_testz_si256(found, found)) {
455 current_frame->len += (uint8_t *)in - aligned_start;
457 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
458 while (in < (const __m256i *)limit) {
459 __m256i data = _mm256_load_si256(in);
460 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
461 __m256i found = _mm256_cmpeq_epi8(data, needle);
462 if (!_mm256_testz_si256(found, found)) {
469 current_frame->len = (uint8_t *)out - current_frame->data;
472 const __m128i needle = _mm_set1_epi8(sync_char);
474 const __m128i *in = (const __m128i *)aligned_start;
475 if (current_frame->interleaved) {
476 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
477 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
478 if (current_frame->len % 2 == 1) {
482 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
483 while (in < (const __m128i *)limit) {
484 __m128i data1 = _mm_load_si128(in);
485 __m128i data2 = _mm_load_si128(in + 1);
486 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
487 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
488 __m128i data1_hi = _mm_srli_epi16(data1, 8);
489 __m128i data2_hi = _mm_srli_epi16(data2, 8);
490 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
491 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
492 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
493 _mm_storeu_si128(out2, hi);
494 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
495 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
496 if (!_mm_testz_si128(found1, found1) ||
497 !_mm_testz_si128(found2, found2)) {
505 current_frame->len += (uint8_t *)in - aligned_start;
507 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
508 while (in < (const __m128i *)limit) {
509 __m128i data = _mm_load_si128(in);
510 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
511 __m128i found = _mm_cmpeq_epi8(data, needle);
512 if (!_mm_testz_si128(found, found)) {
519 current_frame->len = (uint8_t *)out - current_frame->data;
523 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
525 return (const uint8_t *)in;
529 void decode_packs(const libusb_transfer *xfr,
530 const char *sync_pattern,
532 FrameAllocator::Frame *current_frame,
533 const char *frame_type_name,
534 function<void(const uint8_t *start)> start_callback)
537 for (int i = 0; i < xfr->num_iso_packets; i++) {
538 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
540 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
541 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
546 const uint8_t *start = xfr->buffer + offset;
547 const uint8_t *limit = start + pack->actual_length;
548 while (start < limit) { // Usually runs only one iteration.
550 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
551 if (start == limit) break;
552 assert(start < limit);
555 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
556 if (start_next_frame == nullptr) {
557 // add the rest of the buffer
558 add_to_frame(current_frame, frame_type_name, start, limit);
561 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
562 start = start_next_frame + sync_length; // skip sync
563 start_callback(start);
567 dump_pack(xfr, offset, pack);
569 offset += pack->length;
573 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
575 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
576 fprintf(stderr, "transfer status %d\n", xfr->status);
577 libusb_free_transfer(xfr);
581 assert(xfr->user_data != nullptr);
582 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
584 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
585 if (xfr->endpoint == 0x84) {
586 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
588 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
591 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
592 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
593 uint8_t *buf = libusb_control_transfer_get_data(xfr);
595 if (setup->wIndex == 44) {
596 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
598 printf("read register %2d: 0x%02x%02x%02x%02x\n",
599 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
602 memcpy(usb->register_file + usb->current_register, buf, 4);
603 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
604 if (usb->current_register == 0) {
605 // read through all of them
606 printf("register dump:");
607 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
608 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
612 libusb_fill_control_setup(xfr->buffer,
613 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
614 /*index=*/usb->current_register, /*length=*/4);
619 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
620 for (i = 0; i < xfr->actual_length; i++) {
621 printf("%02x", xfr->buffer[i]);
631 if (libusb_submit_transfer(xfr) < 0) {
632 fprintf(stderr, "error re-submitting URB\n");
637 void BMUSBCapture::usb_thread_func()
640 memset(¶m, 0, sizeof(param));
641 param.sched_priority = 1;
642 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
643 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
645 while (!should_quit) {
646 int rc = libusb_handle_events(nullptr);
647 if (rc != LIBUSB_SUCCESS)
652 void BMUSBCapture::configure_card()
654 if (video_frame_allocator == nullptr) {
655 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
657 if (audio_frame_allocator == nullptr) {
658 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
660 dequeue_thread_should_quit = false;
661 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
664 struct libusb_transfer *xfr;
666 rc = libusb_init(nullptr);
668 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
672 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
673 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
674 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
676 fprintf(stderr, "Error finding USB device\n");
680 libusb_config_descriptor *config;
681 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
683 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
686 printf("%d interface\n", config->bNumInterfaces);
687 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
688 printf(" interface %d\n", interface_number);
689 const libusb_interface *interface = &config->interface[interface_number];
690 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
691 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
692 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
693 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
694 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
695 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
700 rc = libusb_set_configuration(devh, /*configuration=*/1);
702 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
706 rc = libusb_claim_interface(devh, 0);
708 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
712 // Alternate setting 1 is output, alternate setting 2 is input.
713 // Card is reset when switching alternates, so the driver uses
714 // this “double switch” when it wants to reset.
716 // There's also alternate settings 3 and 4, which seem to be
717 // like 1 and 2 except they advertise less bandwidth needed.
718 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
720 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
723 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
725 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
729 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
731 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
737 rc = libusb_claim_interface(devh, 3);
739 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
745 // 44 is some kind of timer register (first 16 bits count upwards)
746 // 24 is some sort of watchdog?
747 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
748 // (or will go to 0x73c60010?), also seen 0x73c60100
749 // 12 also changes all the time, unclear why
750 // 16 seems to be autodetected mode somehow
751 // -- this is e00115e0 after reset?
752 // ed0115e0 after mode change [to output?]
753 // 2d0015e0 after more mode change [to input]
754 // ed0115e0 after more mode change
755 // 2d0015e0 after more mode change
757 // 390115e0 seems to indicate we have signal
758 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
760 // 200015e0 on startup
761 // changes to 250115e0 when we sync to the signal
763 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
765 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
767 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
768 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
770 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
771 // perhaps some of them are related to analog output?
773 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
774 // but the driver sets it to 0x8036802a at some point.
776 // all of this is on request 214/215. other requests (192, 219,
777 // 222, 223, 224) are used for firmware upgrade. Probably best to
778 // stay out of it unless you know what you're doing.
782 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
785 // 0x01 - stable signal
787 // 0x08 - unknown (audio??)
797 static const ctrl ctrls[] = {
798 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
799 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
801 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
803 // clearing the 0x08000000 bit seems to change the capture format (other source?)
804 // 0x10000000 = analog audio instead of embedded audio, it seems
805 // 0x3a000000 = component video? (analog audio)
806 // 0x3c000000 = composite video? (analog audio)
807 // 0x3e000000 = s-video? (analog audio)
808 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
809 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
810 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
811 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
812 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
815 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
816 uint32_t flipped = htonl(ctrls[req].data);
817 static uint8_t value[4];
818 memcpy(value, &flipped, sizeof(flipped));
819 int size = sizeof(value);
820 //if (ctrls[req].request == 215) size = 0;
821 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
822 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
824 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
828 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
829 for (int i = 0; i < rc; ++i) {
830 printf("%02x", value[i]);
838 static int my_index = 0;
839 static uint8_t value[4];
840 int size = sizeof(value);
841 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
842 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
844 fprintf(stderr, "Error on control\n");
847 printf("rc=%d index=%d: 0x", rc, my_index);
848 for (int i = 0; i < rc; ++i) {
849 printf("%02x", value[i]);
856 // set up an asynchronous transfer of the timer register
857 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
858 static int completed = 0;
860 xfr = libusb_alloc_transfer(0);
861 libusb_fill_control_setup(cmdbuf,
862 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
863 /*index=*/44, /*length=*/4);
864 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
865 xfr->user_data = this;
866 libusb_submit_transfer(xfr);
868 // set up an asynchronous transfer of register 24
869 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
870 static int completed2 = 0;
872 xfr = libusb_alloc_transfer(0);
873 libusb_fill_control_setup(cmdbuf2,
874 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
875 /*index=*/24, /*length=*/4);
876 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
877 xfr->user_data = this;
878 libusb_submit_transfer(xfr);
881 // set up an asynchronous transfer of the register dump
882 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
883 static int completed3 = 0;
885 xfr = libusb_alloc_transfer(0);
886 libusb_fill_control_setup(cmdbuf3,
887 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
888 /*index=*/current_register, /*length=*/4);
889 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
890 xfr->user_data = this;
891 //libusb_submit_transfer(xfr);
893 audiofp = fopen("audio.raw", "wb");
895 // set up isochronous transfers for audio and video
896 for (int e = 3; e <= 4; ++e) {
897 //int num_transfers = (e == 3) ? 6 : 6;
898 int num_transfers = 6;
899 for (int i = 0; i < num_transfers; ++i) {
900 int num_iso_pack, size;
902 // Video seems to require isochronous packets scaled with the width;
903 // seemingly six lines is about right, rounded up to the required 1kB
905 size = WIDTH * 2 * 6;
906 // Note that for 10-bit input, you'll need to increase size accordingly.
907 //size = size * 4 / 3;
908 if (size % 1024 != 0) {
912 num_iso_pack = (2 << 18) / size; // 512 kB.
913 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
918 int num_bytes = num_iso_pack * size;
919 uint8_t *buf = new uint8_t[num_bytes];
921 xfr = libusb_alloc_transfer(num_iso_pack);
923 fprintf(stderr, "oom\n");
927 int ep = LIBUSB_ENDPOINT_IN | e;
928 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
929 num_iso_pack, cb_xfr, nullptr, 0);
930 libusb_set_iso_packet_lengths(xfr, size);
931 xfr->user_data = this;
932 iso_xfrs.push_back(xfr);
937 void BMUSBCapture::start_bm_capture()
939 printf("starting capture\n");
941 for (libusb_transfer *xfr : iso_xfrs) {
942 printf("submitting transfer...\n");
943 int rc = libusb_submit_transfer(xfr);
946 //printf("num_bytes=%d\n", num_bytes);
947 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
948 xfr->endpoint, i, libusb_error_name(rc));
955 libusb_release_interface(devh, 0);
959 libusb_exit(nullptr);
964 void BMUSBCapture::stop_dequeue_thread()
966 dequeue_thread_should_quit = true;
967 queues_not_empty.notify_all();
968 dequeue_thread.join();
971 void BMUSBCapture::start_bm_thread()
974 usb_thread = thread(&BMUSBCapture::usb_thread_func);
977 void BMUSBCapture::stop_bm_thread()