1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 #define NUM_QUEUED_FRAMES 16
56 class MallocFrameAllocator : public FrameAllocator {
58 MallocFrameAllocator(size_t frame_size);
59 Frame alloc_frame() override;
60 void release_frame(Frame frame) override;
66 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
69 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
70 : frame_size(frame_size)
72 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
73 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
77 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
82 unique_lock<mutex> lock(freelist_mutex); // Meh.
83 if (freelist.empty()) {
84 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
87 vf.data = freelist.top().release();
89 freelist.pop(); // Meh.
94 void MallocFrameAllocator::release_frame(Frame frame)
96 if (frame.overflow > 0) {
97 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
99 unique_lock<mutex> lock(freelist_mutex);
100 freelist.push(unique_ptr<uint8_t[]>(frame.data));
103 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
108 return (b - a < 0x8000);
110 int wrap_b = 0x10000 + int(b);
111 return (wrap_b - a < 0x8000);
115 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
117 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
118 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
119 q->back().timecode, timecode);
120 frame.owner->release_frame(frame);
126 qf.timecode = timecode;
130 unique_lock<mutex> lock(queue_lock);
131 q->push_back(move(qf));
133 queues_not_empty.notify_one(); // might be spurious
136 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
138 FILE *fp = fopen(filename, "wb");
139 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
140 printf("short write!\n");
145 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
147 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
150 void BMUSBCapture::dequeue_thread_func()
152 if (has_dequeue_callbacks) {
153 dequeue_init_callback();
155 while (!dequeue_thread_should_quit) {
156 unique_lock<mutex> lock(queue_lock);
157 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
159 uint16_t video_timecode = pending_video_frames.front().timecode;
160 uint16_t audio_timecode = pending_audio_frames.front().timecode;
161 if (video_timecode < audio_timecode) {
162 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
164 video_frame_allocator->release_frame(pending_video_frames.front().frame);
165 pending_video_frames.pop_front();
166 } else if (audio_timecode < video_timecode) {
167 printf("Audio block 0x%04x without corresponding video block, dropping.\n",
169 audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
170 pending_audio_frames.pop_front();
172 QueuedFrame video_frame = pending_video_frames.front();
173 QueuedFrame audio_frame = pending_audio_frames.front();
174 pending_audio_frames.pop_front();
175 pending_video_frames.pop_front();
180 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
181 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
182 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
185 frame_callback(video_timecode,
186 video_frame.frame, HEADER_SIZE, video_frame.format,
187 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
190 if (has_dequeue_callbacks) {
191 dequeue_cleanup_callback();
195 void BMUSBCapture::start_new_frame(const uint8_t *start)
197 uint16_t format = (start[3] << 8) | start[2];
198 uint16_t timecode = (start[1] << 8) | start[0];
200 if (current_video_frame.len > 0) {
202 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
204 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
206 // //start[7], start[6], start[5], start[4],
207 // read_current_frame, FRAME_SIZE);
209 current_video_frame = video_frame_allocator->alloc_frame();
210 //if (current_video_frame.data == nullptr) {
211 // read_current_frame = -1;
213 // read_current_frame = 0;
217 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
219 uint16_t format = (start[3] << 8) | start[2];
220 uint16_t timecode = (start[1] << 8) | start[0];
221 if (current_audio_frame.len > 0) {
222 //dump_audio_block();
223 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
225 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
226 // format, timecode, read_current_audio_block);
227 current_audio_frame = audio_frame_allocator->alloc_frame();
231 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
233 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
234 for (unsigned j = 0; j < pack->actual_length; j++) {
235 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
236 printf("%02x", xfr->buffer[j + offset]);
239 else if ((j % 8) == 7)
247 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
250 uint8_t *dptr1 = dest1;
251 uint8_t *dptr2 = dest2;
253 for (size_t i = 0; i < n; i += 2) {
259 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
261 if (current_frame->data == nullptr ||
262 current_frame->len > current_frame->size ||
267 int bytes = end - start;
268 if (current_frame->len + bytes > current_frame->size) {
269 current_frame->overflow = current_frame->len + bytes - current_frame->size;
270 current_frame->len = current_frame->size;
271 if (current_frame->overflow > 1048576) {
272 printf("%d bytes overflow after last %s frame\n",
273 int(current_frame->overflow), frame_type_name);
274 current_frame->overflow = 0;
278 if (current_frame->interleaved) {
279 uint8_t *data = current_frame->data + current_frame->len / 2;
280 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
281 if (current_frame->len % 2 == 1) {
285 if (bytes % 2 == 1) {
288 ++current_frame->len;
291 memcpy_interleaved(data, data2, start, bytes);
292 current_frame->len += bytes;
294 memcpy(current_frame->data + current_frame->len, start, bytes);
295 current_frame->len += bytes;
303 void avx2_dump(const char *name, __m256i n)
305 printf("%-10s:", name);
306 printf(" %02x", _mm256_extract_epi8(n, 0));
307 printf(" %02x", _mm256_extract_epi8(n, 1));
308 printf(" %02x", _mm256_extract_epi8(n, 2));
309 printf(" %02x", _mm256_extract_epi8(n, 3));
310 printf(" %02x", _mm256_extract_epi8(n, 4));
311 printf(" %02x", _mm256_extract_epi8(n, 5));
312 printf(" %02x", _mm256_extract_epi8(n, 6));
313 printf(" %02x", _mm256_extract_epi8(n, 7));
315 printf(" %02x", _mm256_extract_epi8(n, 8));
316 printf(" %02x", _mm256_extract_epi8(n, 9));
317 printf(" %02x", _mm256_extract_epi8(n, 10));
318 printf(" %02x", _mm256_extract_epi8(n, 11));
319 printf(" %02x", _mm256_extract_epi8(n, 12));
320 printf(" %02x", _mm256_extract_epi8(n, 13));
321 printf(" %02x", _mm256_extract_epi8(n, 14));
322 printf(" %02x", _mm256_extract_epi8(n, 15));
324 printf(" %02x", _mm256_extract_epi8(n, 16));
325 printf(" %02x", _mm256_extract_epi8(n, 17));
326 printf(" %02x", _mm256_extract_epi8(n, 18));
327 printf(" %02x", _mm256_extract_epi8(n, 19));
328 printf(" %02x", _mm256_extract_epi8(n, 20));
329 printf(" %02x", _mm256_extract_epi8(n, 21));
330 printf(" %02x", _mm256_extract_epi8(n, 22));
331 printf(" %02x", _mm256_extract_epi8(n, 23));
333 printf(" %02x", _mm256_extract_epi8(n, 24));
334 printf(" %02x", _mm256_extract_epi8(n, 25));
335 printf(" %02x", _mm256_extract_epi8(n, 26));
336 printf(" %02x", _mm256_extract_epi8(n, 27));
337 printf(" %02x", _mm256_extract_epi8(n, 28));
338 printf(" %02x", _mm256_extract_epi8(n, 29));
339 printf(" %02x", _mm256_extract_epi8(n, 30));
340 printf(" %02x", _mm256_extract_epi8(n, 31));
345 // Does a memcpy and memchr in one to reduce processing time.
346 // Note that the benefit is somewhat limited if your L3 cache is small,
347 // as you'll (unfortunately) spend most of the time loading the data
350 // Complicated cases are left to the slow path; it basically stops copying
351 // up until the first instance of "sync_char" (usually a bit before, actually).
352 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
353 // data, and what we really need this for is the 00 00 ff ff marker in video data.
354 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
356 if (current_frame->data == nullptr ||
357 current_frame->len > current_frame->size ||
361 size_t orig_bytes = limit - start;
362 if (orig_bytes < 128) {
367 // Don't read more bytes than we can write.
368 limit = min(limit, start + (current_frame->size - current_frame->len));
370 // Align end to 32 bytes.
371 limit = (const uint8_t *)(intptr_t(limit) & ~31);
373 if (start >= limit) {
377 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
378 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
379 if (aligned_start != start) {
380 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
381 if (sync_start == nullptr) {
382 add_to_frame(current_frame, "", start, aligned_start);
384 add_to_frame(current_frame, "", start, sync_start);
389 // Make the length a multiple of 64.
390 if (current_frame->interleaved) {
391 if (((limit - aligned_start) % 64) != 0) {
394 assert(((limit - aligned_start) % 64) == 0);
398 const __m256i needle = _mm256_set1_epi8(sync_char);
400 const __restrict __m256i *in = (const __m256i *)aligned_start;
401 if (current_frame->interleaved) {
402 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
403 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
404 if (current_frame->len % 2 == 1) {
408 __m256i shuffle_cw = _mm256_set_epi8(
409 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
410 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
411 while (in < (const __m256i *)limit) {
412 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
413 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
414 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
416 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
417 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
418 __m256i found = _mm256_or_si256(found1, found2);
420 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
421 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
423 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
424 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
426 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
427 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
429 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
430 _mm256_storeu_si256(out2, hi);
432 if (!_mm256_testz_si256(found, found)) {
440 current_frame->len += (uint8_t *)in - aligned_start;
442 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
443 while (in < (const __m256i *)limit) {
444 __m256i data = _mm256_load_si256(in);
445 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
446 __m256i found = _mm256_cmpeq_epi8(data, needle);
447 if (!_mm256_testz_si256(found, found)) {
454 current_frame->len = (uint8_t *)out - current_frame->data;
457 const __m128i needle = _mm_set1_epi8(sync_char);
459 const __m128i *in = (const __m128i *)aligned_start;
460 if (current_frame->interleaved) {
461 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
462 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
463 if (current_frame->len % 2 == 1) {
467 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
468 while (in < (const __m128i *)limit) {
469 __m128i data1 = _mm_load_si128(in);
470 __m128i data2 = _mm_load_si128(in + 1);
471 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
472 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
473 __m128i data1_hi = _mm_srli_epi16(data1, 8);
474 __m128i data2_hi = _mm_srli_epi16(data2, 8);
475 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
476 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
477 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
478 _mm_storeu_si128(out2, hi);
479 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
480 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
481 if (!_mm_testz_si128(found1, found1) ||
482 !_mm_testz_si128(found2, found2)) {
490 current_frame->len += (uint8_t *)in - aligned_start;
492 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
493 while (in < (const __m128i *)limit) {
494 __m128i data = _mm_load_si128(in);
495 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
496 __m128i found = _mm_cmpeq_epi8(data, needle);
497 if (!_mm_testz_si128(found, found)) {
504 current_frame->len = (uint8_t *)out - current_frame->data;
508 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
510 return (const uint8_t *)in;
514 void decode_packs(const libusb_transfer *xfr,
515 const char *sync_pattern,
517 FrameAllocator::Frame *current_frame,
518 const char *frame_type_name,
519 function<void(const uint8_t *start)> start_callback)
522 for (int i = 0; i < xfr->num_iso_packets; i++) {
523 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
525 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
526 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
531 const uint8_t *start = xfr->buffer + offset;
532 const uint8_t *limit = start + pack->actual_length;
533 while (start < limit) { // Usually runs only one iteration.
535 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
536 if (start == limit) break;
537 assert(start < limit);
540 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
541 if (start_next_frame == nullptr) {
542 // add the rest of the buffer
543 add_to_frame(current_frame, frame_type_name, start, limit);
546 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
547 start = start_next_frame + sync_length; // skip sync
548 start_callback(start);
552 dump_pack(xfr, offset, pack);
554 offset += pack->length;
558 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
560 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
561 fprintf(stderr, "transfer status %d\n", xfr->status);
562 libusb_free_transfer(xfr);
566 assert(xfr->user_data != nullptr);
567 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
569 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
570 if (xfr->endpoint == 0x84) {
571 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
573 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
576 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
577 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
578 uint8_t *buf = libusb_control_transfer_get_data(xfr);
580 if (setup->wIndex == 44) {
581 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
583 printf("read register %2d: 0x%02x%02x%02x%02x\n",
584 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
587 memcpy(usb->register_file + usb->current_register, buf, 4);
588 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
589 if (usb->current_register == 0) {
590 // read through all of them
591 printf("register dump:");
592 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
593 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
597 libusb_fill_control_setup(xfr->buffer,
598 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
599 /*index=*/usb->current_register, /*length=*/4);
604 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
605 for (i = 0; i < xfr->actual_length; i++) {
606 printf("%02x", xfr->buffer[i]);
616 if (libusb_submit_transfer(xfr) < 0) {
617 fprintf(stderr, "error re-submitting URB\n");
622 void BMUSBCapture::usb_thread_func()
625 memset(¶m, 0, sizeof(param));
626 param.sched_priority = 1;
627 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
628 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
630 while (!should_quit) {
631 int rc = libusb_handle_events(nullptr);
632 if (rc != LIBUSB_SUCCESS)
637 void BMUSBCapture::configure_card()
639 if (video_frame_allocator == nullptr) {
640 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
642 if (audio_frame_allocator == nullptr) {
643 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
645 dequeue_thread_should_quit = false;
646 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
649 struct libusb_transfer *xfr;
651 rc = libusb_init(nullptr);
653 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
657 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
658 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
659 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
661 fprintf(stderr, "Error finding USB device\n");
665 libusb_config_descriptor *config;
666 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
668 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
671 printf("%d interface\n", config->bNumInterfaces);
672 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
673 printf(" interface %d\n", interface_number);
674 const libusb_interface *interface = &config->interface[interface_number];
675 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
676 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
677 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
678 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
679 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
680 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
685 rc = libusb_set_configuration(devh, /*configuration=*/1);
687 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
691 rc = libusb_claim_interface(devh, 0);
693 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
697 // Alternate setting 1 is output, alternate setting 2 is input.
698 // Card is reset when switching alternates, so the driver uses
699 // this “double switch” when it wants to reset.
701 // There's also alternate settings 3 and 4, which seem to be
702 // like 1 and 2 except they advertise less bandwidth needed.
703 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
705 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
708 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
710 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
714 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
716 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
722 rc = libusb_claim_interface(devh, 3);
724 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
730 // 44 is some kind of timer register (first 16 bits count upwards)
731 // 24 is some sort of watchdog?
732 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
733 // (or will go to 0x73c60010?), also seen 0x73c60100
734 // 12 also changes all the time, unclear why
735 // 16 seems to be autodetected mode somehow
736 // -- this is e00115e0 after reset?
737 // ed0115e0 after mode change [to output?]
738 // 2d0015e0 after more mode change [to input]
739 // ed0115e0 after more mode change
740 // 2d0015e0 after more mode change
742 // 390115e0 seems to indicate we have signal
743 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
745 // 200015e0 on startup
746 // changes to 250115e0 when we sync to the signal
748 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
750 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
752 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
753 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
755 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
756 // perhaps some of them are related to analog output?
758 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
759 // but the driver sets it to 0x8036802a at some point.
761 // all of this is on request 214/215. other requests (192, 219,
762 // 222, 223, 224) are used for firmware upgrade. Probably best to
763 // stay out of it unless you know what you're doing.
767 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
770 // 0x01 - stable signal
772 // 0x08 - unknown (audio??)
782 static const ctrl ctrls[] = {
783 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
784 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
786 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
788 // clearing the 0x08000000 bit seems to change the capture format (other source?)
789 // 0x10000000 = analog audio instead of embedded audio, it seems
790 // 0x3a000000 = component video? (analog audio)
791 // 0x3c000000 = composite video? (analog audio)
792 // 0x3e000000 = s-video? (analog audio)
793 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
794 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
795 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
796 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
797 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
800 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
801 uint32_t flipped = htonl(ctrls[req].data);
802 static uint8_t value[4];
803 memcpy(value, &flipped, sizeof(flipped));
804 int size = sizeof(value);
805 //if (ctrls[req].request == 215) size = 0;
806 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
807 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
809 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
813 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
814 for (int i = 0; i < rc; ++i) {
815 printf("%02x", value[i]);
823 static int my_index = 0;
824 static uint8_t value[4];
825 int size = sizeof(value);
826 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
827 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
829 fprintf(stderr, "Error on control\n");
832 printf("rc=%d index=%d: 0x", rc, my_index);
833 for (int i = 0; i < rc; ++i) {
834 printf("%02x", value[i]);
841 // set up an asynchronous transfer of the timer register
842 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
843 static int completed = 0;
845 xfr = libusb_alloc_transfer(0);
846 libusb_fill_control_setup(cmdbuf,
847 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
848 /*index=*/44, /*length=*/4);
849 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
850 xfr->user_data = this;
851 libusb_submit_transfer(xfr);
853 // set up an asynchronous transfer of register 24
854 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
855 static int completed2 = 0;
857 xfr = libusb_alloc_transfer(0);
858 libusb_fill_control_setup(cmdbuf2,
859 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
860 /*index=*/24, /*length=*/4);
861 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
862 xfr->user_data = this;
863 libusb_submit_transfer(xfr);
866 // set up an asynchronous transfer of the register dump
867 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
868 static int completed3 = 0;
870 xfr = libusb_alloc_transfer(0);
871 libusb_fill_control_setup(cmdbuf3,
872 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
873 /*index=*/current_register, /*length=*/4);
874 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
875 xfr->user_data = this;
876 //libusb_submit_transfer(xfr);
878 audiofp = fopen("audio.raw", "wb");
880 // set up isochronous transfers for audio and video
881 for (int e = 3; e <= 4; ++e) {
882 //int num_transfers = (e == 3) ? 6 : 6;
883 int num_transfers = 6;
884 for (int i = 0; i < num_transfers; ++i) {
885 int num_iso_pack, size;
887 // Video seems to require isochronous packets scaled with the width;
888 // seemingly six lines is about right, rounded up to the required 1kB
890 size = WIDTH * 2 * 6;
891 // Note that for 10-bit input, you'll need to increase size accordingly.
892 //size = size * 4 / 3;
893 if (size % 1024 != 0) {
897 num_iso_pack = (2 << 18) / size; // 512 kB.
898 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
903 int num_bytes = num_iso_pack * size;
904 uint8_t *buf = new uint8_t[num_bytes];
906 xfr = libusb_alloc_transfer(num_iso_pack);
908 fprintf(stderr, "oom\n");
912 int ep = LIBUSB_ENDPOINT_IN | e;
913 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
914 num_iso_pack, cb_xfr, nullptr, 0);
915 libusb_set_iso_packet_lengths(xfr, size);
916 xfr->user_data = this;
917 iso_xfrs.push_back(xfr);
922 void BMUSBCapture::start_bm_capture()
924 printf("starting capture\n");
926 for (libusb_transfer *xfr : iso_xfrs) {
927 printf("submitting transfer...\n");
928 int rc = libusb_submit_transfer(xfr);
931 //printf("num_bytes=%d\n", num_bytes);
932 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
933 xfr->endpoint, i, libusb_error_name(rc));
940 libusb_release_interface(devh, 0);
944 libusb_exit(nullptr);
949 void BMUSBCapture::stop_dequeue_thread()
951 dequeue_thread_should_quit = true;
952 queues_not_empty.notify_all();
953 dequeue_thread.join();
956 void BMUSBCapture::start_bm_thread()
959 usb_thread = thread(&BMUSBCapture::usb_thread_func);
962 void BMUSBCapture::stop_bm_thread()