1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 #define NUM_QUEUED_FRAMES 16
56 class MallocFrameAllocator : public FrameAllocator {
58 MallocFrameAllocator(size_t frame_size);
59 Frame alloc_frame() override;
60 void release_frame(Frame frame) override;
66 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
69 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
70 : frame_size(frame_size)
72 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
73 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
77 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
82 unique_lock<mutex> lock(freelist_mutex); // Meh.
83 if (freelist.empty()) {
84 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
87 vf.data = freelist.top().release();
89 freelist.pop(); // Meh.
94 void MallocFrameAllocator::release_frame(Frame frame)
96 unique_lock<mutex> lock(freelist_mutex);
97 freelist.push(unique_ptr<uint8_t[]>(frame.data));
100 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
105 return (b - a < 0x8000);
107 int wrap_b = 0x10000 + int(b);
108 return (wrap_b - a < 0x8000);
112 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
114 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
115 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
116 q->back().timecode, timecode);
117 frame.owner->release_frame(frame);
123 qf.timecode = timecode;
127 unique_lock<mutex> lock(queue_lock);
128 q->push_back(move(qf));
130 queues_not_empty.notify_one(); // might be spurious
133 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
135 FILE *fp = fopen(filename, "wb");
136 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
137 printf("short write!\n");
142 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
144 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
147 void BMUSBCapture::dequeue_thread_func()
149 if (has_dequeue_callbacks) {
150 dequeue_init_callback();
152 while (!dequeue_thread_should_quit) {
153 unique_lock<mutex> lock(queue_lock);
154 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
156 uint16_t video_timecode = pending_video_frames.front().timecode;
157 uint16_t audio_timecode = pending_audio_frames.front().timecode;
158 if (video_timecode < audio_timecode) {
159 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
161 video_frame_allocator->release_frame(pending_video_frames.front().frame);
162 pending_video_frames.pop_front();
163 } else if (audio_timecode < video_timecode) {
164 printf("Audio block 0x%04x without corresponding video block, dropping.\n",
166 audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
167 pending_audio_frames.pop_front();
169 QueuedFrame video_frame = pending_video_frames.front();
170 QueuedFrame audio_frame = pending_audio_frames.front();
171 pending_audio_frames.pop_front();
172 pending_video_frames.pop_front();
177 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
178 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
179 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
182 frame_callback(video_timecode,
183 video_frame.frame, HEADER_SIZE, video_frame.format,
184 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
187 if (has_dequeue_callbacks) {
188 dequeue_cleanup_callback();
192 void BMUSBCapture::start_new_frame(const uint8_t *start)
194 uint16_t format = (start[3] << 8) | start[2];
195 uint16_t timecode = (start[1] << 8) | start[0];
197 if (current_video_frame.len > 0) {
199 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
201 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
203 // //start[7], start[6], start[5], start[4],
204 // read_current_frame, FRAME_SIZE);
206 current_video_frame = video_frame_allocator->alloc_frame();
207 //if (current_video_frame.data == nullptr) {
208 // read_current_frame = -1;
210 // read_current_frame = 0;
214 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
216 uint16_t format = (start[3] << 8) | start[2];
217 uint16_t timecode = (start[1] << 8) | start[0];
218 if (current_audio_frame.len > 0) {
219 //dump_audio_block();
220 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
222 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
223 // format, timecode, read_current_audio_block);
224 current_audio_frame = audio_frame_allocator->alloc_frame();
228 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
230 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
231 for (unsigned j = 0; j < pack->actual_length; j++) {
232 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
233 printf("%02x", xfr->buffer[j + offset]);
236 else if ((j % 8) == 7)
244 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
247 uint8_t *dptr1 = dest1;
248 uint8_t *dptr2 = dest2;
250 for (size_t i = 0; i < n; i += 2) {
256 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
258 if (current_frame->data == nullptr ||
259 current_frame->len > current_frame->size ||
264 int bytes = end - start;
265 if (current_frame->len + bytes > current_frame->size) {
266 printf("%d bytes overflow after last %s frame\n",
267 int(current_frame->len + bytes - current_frame->size), frame_type_name);
270 if (current_frame->interleaved) {
271 uint8_t *data = current_frame->data + current_frame->len / 2;
272 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
273 if (current_frame->len % 2 == 1) {
277 if (bytes % 2 == 1) {
280 ++current_frame->len;
283 memcpy_interleaved(data, data2, start, bytes);
284 current_frame->len += bytes;
286 memcpy(current_frame->data + current_frame->len, start, bytes);
287 current_frame->len += bytes;
295 void avx2_dump(const char *name, __m256i n)
297 printf("%-10s:", name);
298 printf(" %02x", _mm256_extract_epi8(n, 0));
299 printf(" %02x", _mm256_extract_epi8(n, 1));
300 printf(" %02x", _mm256_extract_epi8(n, 2));
301 printf(" %02x", _mm256_extract_epi8(n, 3));
302 printf(" %02x", _mm256_extract_epi8(n, 4));
303 printf(" %02x", _mm256_extract_epi8(n, 5));
304 printf(" %02x", _mm256_extract_epi8(n, 6));
305 printf(" %02x", _mm256_extract_epi8(n, 7));
307 printf(" %02x", _mm256_extract_epi8(n, 8));
308 printf(" %02x", _mm256_extract_epi8(n, 9));
309 printf(" %02x", _mm256_extract_epi8(n, 10));
310 printf(" %02x", _mm256_extract_epi8(n, 11));
311 printf(" %02x", _mm256_extract_epi8(n, 12));
312 printf(" %02x", _mm256_extract_epi8(n, 13));
313 printf(" %02x", _mm256_extract_epi8(n, 14));
314 printf(" %02x", _mm256_extract_epi8(n, 15));
316 printf(" %02x", _mm256_extract_epi8(n, 16));
317 printf(" %02x", _mm256_extract_epi8(n, 17));
318 printf(" %02x", _mm256_extract_epi8(n, 18));
319 printf(" %02x", _mm256_extract_epi8(n, 19));
320 printf(" %02x", _mm256_extract_epi8(n, 20));
321 printf(" %02x", _mm256_extract_epi8(n, 21));
322 printf(" %02x", _mm256_extract_epi8(n, 22));
323 printf(" %02x", _mm256_extract_epi8(n, 23));
325 printf(" %02x", _mm256_extract_epi8(n, 24));
326 printf(" %02x", _mm256_extract_epi8(n, 25));
327 printf(" %02x", _mm256_extract_epi8(n, 26));
328 printf(" %02x", _mm256_extract_epi8(n, 27));
329 printf(" %02x", _mm256_extract_epi8(n, 28));
330 printf(" %02x", _mm256_extract_epi8(n, 29));
331 printf(" %02x", _mm256_extract_epi8(n, 30));
332 printf(" %02x", _mm256_extract_epi8(n, 31));
337 // Does a memcpy and memchr in one to reduce processing time.
338 // Note that the benefit is somewhat limited if your L3 cache is small,
339 // as you'll (unfortunately) spend most of the time loading the data
342 // Complicated cases are left to the slow path; it basically stops copying
343 // up until the first instance of "sync_char" (usually a bit before, actually).
344 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
345 // data, and what we really need this for is the 00 00 ff ff marker in video data.
346 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
348 if (current_frame->data == nullptr ||
349 current_frame->len > current_frame->size ||
353 size_t orig_bytes = limit - start;
354 if (orig_bytes < 128) {
359 // Don't read more bytes than we can write.
360 limit = min(limit, start + (current_frame->size - current_frame->len));
362 // Align end to 32 bytes.
363 limit = (const uint8_t *)(intptr_t(limit) & ~31);
365 if (start >= limit) {
369 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
370 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
371 if (aligned_start != start) {
372 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
373 if (sync_start == nullptr) {
374 add_to_frame(current_frame, "", start, aligned_start);
376 add_to_frame(current_frame, "", start, sync_start);
381 // Make the length a multiple of 64.
382 if (current_frame->interleaved) {
383 if (((limit - aligned_start) % 64) != 0) {
386 assert(((limit - aligned_start) % 64) == 0);
390 const __m256i needle = _mm256_set1_epi8(sync_char);
392 const __restrict __m256i *in = (const __m256i *)aligned_start;
393 if (current_frame->interleaved) {
394 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
395 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
396 if (current_frame->len % 2 == 1) {
400 __m256i shuffle_cw = _mm256_set_epi8(
401 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
402 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
403 while (in < (const __m256i *)limit) {
404 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
405 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
406 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
408 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
409 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
410 __m256i found = _mm256_or_si256(found1, found2);
412 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
413 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
415 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
416 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
418 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
419 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
421 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
422 _mm256_storeu_si256(out2, hi);
424 if (!_mm256_testz_si256(found, found)) {
432 current_frame->len += (uint8_t *)in - aligned_start;
434 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
435 while (in < (const __m256i *)limit) {
436 __m256i data = _mm256_load_si256(in);
437 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
438 __m256i found = _mm256_cmpeq_epi8(data, needle);
439 if (!_mm256_testz_si256(found, found)) {
446 current_frame->len = (uint8_t *)out - current_frame->data;
449 const __m128i needle = _mm_set1_epi8(sync_char);
451 const __m128i *in = (const __m128i *)aligned_start;
452 if (current_frame->interleaved) {
453 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
454 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
455 if (current_frame->len % 2 == 1) {
459 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
460 while (in < (const __m128i *)limit) {
461 __m128i data1 = _mm_load_si128(in);
462 __m128i data2 = _mm_load_si128(in + 1);
463 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
464 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
465 __m128i data1_hi = _mm_srli_epi16(data1, 8);
466 __m128i data2_hi = _mm_srli_epi16(data2, 8);
467 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
468 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
469 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
470 _mm_storeu_si128(out2, hi);
471 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
472 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
473 if (!_mm_testz_si128(found1, found1) ||
474 !_mm_testz_si128(found2, found2)) {
482 current_frame->len += (uint8_t *)in - aligned_start;
484 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
485 while (in < (const __m128i *)limit) {
486 __m128i data = _mm_load_si128(in);
487 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
488 __m128i found = _mm_cmpeq_epi8(data, needle);
489 if (!_mm_testz_si128(found, found)) {
496 current_frame->len = (uint8_t *)out - current_frame->data;
500 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
502 return (const uint8_t *)in;
506 void decode_packs(const libusb_transfer *xfr,
507 const char *sync_pattern,
509 FrameAllocator::Frame *current_frame,
510 const char *frame_type_name,
511 function<void(const uint8_t *start)> start_callback)
514 for (int i = 0; i < xfr->num_iso_packets; i++) {
515 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
517 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
518 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
523 const uint8_t *start = xfr->buffer + offset;
524 const uint8_t *limit = start + pack->actual_length;
525 while (start < limit) { // Usually runs only one iteration.
527 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
528 if (start == limit) break;
529 assert(start < limit);
532 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
533 if (start_next_frame == nullptr) {
534 // add the rest of the buffer
535 add_to_frame(current_frame, frame_type_name, start, limit);
538 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
539 start = start_next_frame + sync_length; // skip sync
540 start_callback(start);
544 dump_pack(xfr, offset, pack);
546 offset += pack->length;
550 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
552 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
553 fprintf(stderr, "transfer status %d\n", xfr->status);
554 libusb_free_transfer(xfr);
558 assert(xfr->user_data != nullptr);
559 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
561 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
562 if (xfr->endpoint == 0x84) {
563 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
565 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
568 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
569 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
570 uint8_t *buf = libusb_control_transfer_get_data(xfr);
572 if (setup->wIndex == 44) {
573 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
575 printf("read register %2d: 0x%02x%02x%02x%02x\n",
576 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
579 memcpy(usb->register_file + usb->current_register, buf, 4);
580 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
581 if (usb->current_register == 0) {
582 // read through all of them
583 printf("register dump:");
584 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
585 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
589 libusb_fill_control_setup(xfr->buffer,
590 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
591 /*index=*/usb->current_register, /*length=*/4);
596 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
597 for (i = 0; i < xfr->actual_length; i++) {
598 printf("%02x", xfr->buffer[i]);
608 if (libusb_submit_transfer(xfr) < 0) {
609 fprintf(stderr, "error re-submitting URB\n");
614 void BMUSBCapture::usb_thread_func()
617 memset(¶m, 0, sizeof(param));
618 param.sched_priority = 1;
619 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
620 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
622 while (!should_quit) {
623 int rc = libusb_handle_events(nullptr);
624 if (rc != LIBUSB_SUCCESS)
629 void BMUSBCapture::configure_card()
631 if (video_frame_allocator == nullptr) {
632 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
634 if (audio_frame_allocator == nullptr) {
635 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
637 dequeue_thread_should_quit = false;
638 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
641 struct libusb_transfer *xfr;
643 rc = libusb_init(nullptr);
645 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
649 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
650 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
651 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
653 fprintf(stderr, "Error finding USB device\n");
657 libusb_config_descriptor *config;
658 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
660 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
663 printf("%d interface\n", config->bNumInterfaces);
664 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
665 printf(" interface %d\n", interface_number);
666 const libusb_interface *interface = &config->interface[interface_number];
667 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
668 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
669 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
670 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
671 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
672 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
677 rc = libusb_set_configuration(devh, /*configuration=*/1);
679 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
683 rc = libusb_claim_interface(devh, 0);
685 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
689 // Alternate setting 1 is output, alternate setting 2 is input.
690 // Card is reset when switching alternates, so the driver uses
691 // this “double switch” when it wants to reset.
693 // There's also alternate settings 3 and 4, which seem to be
694 // like 1 and 2 except they advertise less bandwidth needed.
695 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
697 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
700 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
702 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
706 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
708 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
714 rc = libusb_claim_interface(devh, 3);
716 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
722 // 44 is some kind of timer register (first 16 bits count upwards)
723 // 24 is some sort of watchdog?
724 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
725 // (or will go to 0x73c60010?), also seen 0x73c60100
726 // 12 also changes all the time, unclear why
727 // 16 seems to be autodetected mode somehow
728 // -- this is e00115e0 after reset?
729 // ed0115e0 after mode change [to output?]
730 // 2d0015e0 after more mode change [to input]
731 // ed0115e0 after more mode change
732 // 2d0015e0 after more mode change
734 // 390115e0 seems to indicate we have signal
735 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
737 // 200015e0 on startup
738 // changes to 250115e0 when we sync to the signal
740 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
742 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
744 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
745 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
747 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
748 // perhaps some of them are related to analog output?
750 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
751 // but the driver sets it to 0x8036802a at some point.
753 // all of this is on request 214/215. other requests (192, 219,
754 // 222, 223, 224) are used for firmware upgrade. Probably best to
755 // stay out of it unless you know what you're doing.
759 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
762 // 0x01 - stable signal
764 // 0x08 - unknown (audio??)
774 static const ctrl ctrls[] = {
775 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
776 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
778 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
780 // clearing the 0x08000000 bit seems to change the capture format (other source?)
781 // 0x10000000 = analog audio instead of embedded audio, it seems
782 // 0x3a000000 = component video? (analog audio)
783 // 0x3c000000 = composite video? (analog audio)
784 // 0x3e000000 = s-video? (analog audio)
785 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
786 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
787 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
788 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
789 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
792 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
793 uint32_t flipped = htonl(ctrls[req].data);
794 static uint8_t value[4];
795 memcpy(value, &flipped, sizeof(flipped));
796 int size = sizeof(value);
797 //if (ctrls[req].request == 215) size = 0;
798 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
799 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
801 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
805 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
806 for (int i = 0; i < rc; ++i) {
807 printf("%02x", value[i]);
815 static int my_index = 0;
816 static uint8_t value[4];
817 int size = sizeof(value);
818 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
819 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
821 fprintf(stderr, "Error on control\n");
824 printf("rc=%d index=%d: 0x", rc, my_index);
825 for (int i = 0; i < rc; ++i) {
826 printf("%02x", value[i]);
833 // set up an asynchronous transfer of the timer register
834 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
835 static int completed = 0;
837 xfr = libusb_alloc_transfer(0);
838 libusb_fill_control_setup(cmdbuf,
839 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
840 /*index=*/44, /*length=*/4);
841 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
842 xfr->user_data = this;
843 libusb_submit_transfer(xfr);
845 // set up an asynchronous transfer of register 24
846 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
847 static int completed2 = 0;
849 xfr = libusb_alloc_transfer(0);
850 libusb_fill_control_setup(cmdbuf2,
851 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
852 /*index=*/24, /*length=*/4);
853 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
854 xfr->user_data = this;
855 libusb_submit_transfer(xfr);
858 // set up an asynchronous transfer of the register dump
859 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
860 static int completed3 = 0;
862 xfr = libusb_alloc_transfer(0);
863 libusb_fill_control_setup(cmdbuf3,
864 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
865 /*index=*/current_register, /*length=*/4);
866 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
867 xfr->user_data = this;
868 //libusb_submit_transfer(xfr);
870 audiofp = fopen("audio.raw", "wb");
872 // set up isochronous transfers for audio and video
873 for (int e = 3; e <= 4; ++e) {
874 //int num_transfers = (e == 3) ? 6 : 6;
875 int num_transfers = 6;
876 for (int i = 0; i < num_transfers; ++i) {
877 int num_iso_pack, size;
879 // Video seems to require isochronous packets scaled with the width;
880 // seemingly six lines is about right, rounded up to the required 1kB
882 size = WIDTH * 2 * 6;
883 // Note that for 10-bit input, you'll need to increase size accordingly.
884 //size = size * 4 / 3;
885 if (size % 1024 != 0) {
889 num_iso_pack = (2 << 18) / size; // 512 kB.
890 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
895 int num_bytes = num_iso_pack * size;
896 uint8_t *buf = new uint8_t[num_bytes];
898 xfr = libusb_alloc_transfer(num_iso_pack);
900 fprintf(stderr, "oom\n");
904 int ep = LIBUSB_ENDPOINT_IN | e;
905 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
906 num_iso_pack, cb_xfr, nullptr, 0);
907 libusb_set_iso_packet_lengths(xfr, size);
908 xfr->user_data = this;
909 iso_xfrs.push_back(xfr);
914 void BMUSBCapture::start_bm_capture()
916 printf("starting capture\n");
918 for (libusb_transfer *xfr : iso_xfrs) {
919 printf("submitting transfer...\n");
920 int rc = libusb_submit_transfer(xfr);
923 //printf("num_bytes=%d\n", num_bytes);
924 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
925 xfr->endpoint, i, libusb_error_name(rc));
932 libusb_release_interface(devh, 0);
936 libusb_exit(nullptr);
941 void BMUSBCapture::stop_dequeue_thread()
943 dequeue_thread_should_quit = true;
944 queues_not_empty.notify_all();
945 dequeue_thread.join();
948 void BMUSBCapture::start_bm_thread()
951 usb_thread = thread(&BMUSBCapture::usb_thread_func);
954 void BMUSBCapture::stop_bm_thread()