1 // TODO: Replace with linking to upstream bmusb.
3 // Intensity Shuttle USB3 prototype capture driver, v0.3
4 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
5 // (can do captures for hours at a time with no drops), except during startup
6 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
7 // Audio comes out as 8-channel 24-bit raw audio.
12 #include <netinet/in.h>
19 #include <immintrin.h>
25 #include <condition_variable>
36 using namespace std::placeholders;
39 #define HEIGHT 750 /* 30 lines ancillary data? */
41 //#define HEIGHT 1125 /* ??? lines ancillary data? */
42 #define HEADER_SIZE 44
43 //#define HEADER_SIZE 0
44 #define AUDIO_HEADER_SIZE 4
46 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
47 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
48 #define FRAME_SIZE (8 << 20)
53 atomic<bool> should_quit;
55 FrameAllocator::~FrameAllocator() {}
57 #define NUM_QUEUED_FRAMES 16
58 class MallocFrameAllocator : public FrameAllocator {
60 MallocFrameAllocator(size_t frame_size);
61 Frame alloc_frame() override;
62 void release_frame(Frame frame) override;
68 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
71 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
72 : frame_size(frame_size)
74 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
75 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
79 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
84 unique_lock<mutex> lock(freelist_mutex); // Meh.
85 if (freelist.empty()) {
86 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
89 vf.data = freelist.top().release();
91 freelist.pop(); // Meh.
96 void MallocFrameAllocator::release_frame(Frame frame)
98 unique_lock<mutex> lock(freelist_mutex);
99 freelist.push(unique_ptr<uint8_t[]>(frame.data));
102 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
107 return (b - a < 0x8000);
109 int wrap_b = 0x10000 + int(b);
110 return (wrap_b - a < 0x8000);
114 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
116 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
117 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
118 q->back().timecode, timecode);
119 frame.owner->release_frame(frame);
125 qf.timecode = timecode;
129 unique_lock<mutex> lock(queue_lock);
130 q->push_back(move(qf));
132 queues_not_empty.notify_one(); // might be spurious
135 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
137 FILE *fp = fopen(filename, "wb");
138 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
139 printf("short write!\n");
144 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
146 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
149 void BMUSBCapture::dequeue_thread_func()
151 if (has_dequeue_callbacks) {
152 dequeue_init_callback();
154 while (!dequeue_thread_should_quit) {
155 unique_lock<mutex> lock(queue_lock);
156 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
158 uint16_t video_timecode = pending_video_frames.front().timecode;
159 uint16_t audio_timecode = pending_audio_frames.front().timecode;
160 if (video_timecode < audio_timecode) {
161 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
163 video_frame_allocator->release_frame(pending_video_frames.front().frame);
164 pending_video_frames.pop_front();
165 } else if (audio_timecode < video_timecode) {
166 printf("Audio block 0x%04x without corresponding video block, dropping.\n",
168 audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
169 pending_audio_frames.pop_front();
171 QueuedFrame video_frame = pending_video_frames.front();
172 QueuedFrame audio_frame = pending_audio_frames.front();
173 pending_audio_frames.pop_front();
174 pending_video_frames.pop_front();
179 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
180 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
181 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
184 frame_callback(video_timecode,
185 video_frame.frame, HEADER_SIZE, video_frame.format,
186 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
189 if (has_dequeue_callbacks) {
190 dequeue_cleanup_callback();
194 void BMUSBCapture::start_new_frame(const uint8_t *start)
196 uint16_t format = (start[3] << 8) | start[2];
197 uint16_t timecode = (start[1] << 8) | start[0];
199 if (current_video_frame.len > 0) {
201 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
203 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
205 // //start[7], start[6], start[5], start[4],
206 // read_current_frame, FRAME_SIZE);
208 current_video_frame = video_frame_allocator->alloc_frame();
209 //if (current_video_frame.data == nullptr) {
210 // read_current_frame = -1;
212 // read_current_frame = 0;
216 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
218 uint16_t format = (start[3] << 8) | start[2];
219 uint16_t timecode = (start[1] << 8) | start[0];
220 if (current_audio_frame.len > 0) {
221 //dump_audio_block();
222 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
224 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
225 // format, timecode, read_current_audio_block);
226 current_audio_frame = audio_frame_allocator->alloc_frame();
230 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
232 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
233 for (unsigned j = 0; j < pack->actual_length; j++) {
234 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
235 printf("%02x", xfr->buffer[j + offset]);
238 else if ((j % 8) == 7)
246 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
249 uint8_t *dptr1 = dest1;
250 uint8_t *dptr2 = dest2;
252 for (size_t i = 0; i < n; i += 2) {
258 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
260 if (current_frame->data == nullptr ||
261 current_frame->len > current_frame->size ||
266 int bytes = end - start;
267 if (current_frame->len + bytes > current_frame->size) {
268 printf("%d bytes overflow after last %s frame\n",
269 int(current_frame->len + bytes - current_frame->size), frame_type_name);
272 if (current_frame->interleaved) {
273 uint8_t *data = current_frame->data + current_frame->len / 2;
274 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
275 if (current_frame->len % 2 == 1) {
279 if (bytes % 2 == 1) {
282 ++current_frame->len;
285 memcpy_interleaved(data, data2, start, bytes);
286 current_frame->len += bytes;
288 memcpy(current_frame->data + current_frame->len, start, bytes);
289 current_frame->len += bytes;
297 void avx2_dump(const char *name, __m256i n)
299 printf("%-10s:", name);
300 printf(" %02x", _mm256_extract_epi8(n, 0));
301 printf(" %02x", _mm256_extract_epi8(n, 1));
302 printf(" %02x", _mm256_extract_epi8(n, 2));
303 printf(" %02x", _mm256_extract_epi8(n, 3));
304 printf(" %02x", _mm256_extract_epi8(n, 4));
305 printf(" %02x", _mm256_extract_epi8(n, 5));
306 printf(" %02x", _mm256_extract_epi8(n, 6));
307 printf(" %02x", _mm256_extract_epi8(n, 7));
309 printf(" %02x", _mm256_extract_epi8(n, 8));
310 printf(" %02x", _mm256_extract_epi8(n, 9));
311 printf(" %02x", _mm256_extract_epi8(n, 10));
312 printf(" %02x", _mm256_extract_epi8(n, 11));
313 printf(" %02x", _mm256_extract_epi8(n, 12));
314 printf(" %02x", _mm256_extract_epi8(n, 13));
315 printf(" %02x", _mm256_extract_epi8(n, 14));
316 printf(" %02x", _mm256_extract_epi8(n, 15));
318 printf(" %02x", _mm256_extract_epi8(n, 16));
319 printf(" %02x", _mm256_extract_epi8(n, 17));
320 printf(" %02x", _mm256_extract_epi8(n, 18));
321 printf(" %02x", _mm256_extract_epi8(n, 19));
322 printf(" %02x", _mm256_extract_epi8(n, 20));
323 printf(" %02x", _mm256_extract_epi8(n, 21));
324 printf(" %02x", _mm256_extract_epi8(n, 22));
325 printf(" %02x", _mm256_extract_epi8(n, 23));
327 printf(" %02x", _mm256_extract_epi8(n, 24));
328 printf(" %02x", _mm256_extract_epi8(n, 25));
329 printf(" %02x", _mm256_extract_epi8(n, 26));
330 printf(" %02x", _mm256_extract_epi8(n, 27));
331 printf(" %02x", _mm256_extract_epi8(n, 28));
332 printf(" %02x", _mm256_extract_epi8(n, 29));
333 printf(" %02x", _mm256_extract_epi8(n, 30));
334 printf(" %02x", _mm256_extract_epi8(n, 31));
339 // Does a memcpy and memchr in one to reduce processing time.
340 // Note that the benefit is somewhat limited if your L3 cache is small,
341 // as you'll (unfortunately) spend most of the time loading the data
344 // Complicated cases are left to the slow path; it basically stops copying
345 // up until the first instance of "sync_char" (usually a bit before, actually).
346 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
347 // data, and what we really need this for is the 00 00 ff ff marker in video data.
348 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
350 if (current_frame->data == nullptr ||
351 current_frame->len > current_frame->size ||
355 size_t orig_bytes = limit - start;
356 if (orig_bytes < 128) {
361 // Don't read more bytes than we can write.
362 limit = min(limit, start + (current_frame->size - current_frame->len));
364 // Align end to 32 bytes.
365 limit = (const uint8_t *)(intptr_t(limit) & ~31);
367 if (start >= limit) {
371 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
372 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
373 if (aligned_start != start) {
374 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
375 if (sync_start == nullptr) {
376 add_to_frame(current_frame, "", start, aligned_start);
378 add_to_frame(current_frame, "", start, sync_start);
383 // Make the length a multiple of 64.
384 if (current_frame->interleaved) {
385 if (((limit - aligned_start) % 64) != 0) {
388 assert(((limit - aligned_start) % 64) == 0);
392 const __m256i needle = _mm256_set1_epi8(sync_char);
394 const __restrict __m256i *in = (const __m256i *)aligned_start;
395 if (current_frame->interleaved) {
396 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
397 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
398 if (current_frame->len % 2 == 1) {
402 __m256i shuffle_cw = _mm256_set_epi8(
403 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
404 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
405 while (in < (const __m256i *)limit) {
406 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
407 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
408 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
410 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
411 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
412 __m256i found = _mm256_or_si256(found1, found2);
414 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
415 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
417 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
418 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
420 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
421 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
423 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
424 _mm256_storeu_si256(out2, hi);
426 if (!_mm256_testz_si256(found, found)) {
434 current_frame->len += (uint8_t *)in - aligned_start;
436 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
437 while (in < (const __m256i *)limit) {
438 __m256i data = _mm256_load_si256(in);
439 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
440 __m256i found = _mm256_cmpeq_epi8(data, needle);
441 if (!_mm256_testz_si256(found, found)) {
448 current_frame->len = (uint8_t *)out - current_frame->data;
451 const __m128i needle = _mm_set1_epi8(sync_char);
453 const __m128i *in = (const __m128i *)aligned_start;
454 if (current_frame->interleaved) {
455 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
456 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
457 if (current_frame->len % 2 == 1) {
461 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
462 while (in < (const __m128i *)limit) {
463 __m128i data1 = _mm_load_si128(in);
464 __m128i data2 = _mm_load_si128(in + 1);
465 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
466 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
467 __m128i data1_hi = _mm_srli_epi16(data1, 8);
468 __m128i data2_hi = _mm_srli_epi16(data2, 8);
469 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
470 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
471 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
472 _mm_storeu_si128(out2, hi);
473 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
474 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
475 if (!_mm_testz_si128(found1, found1) ||
476 !_mm_testz_si128(found2, found2)) {
484 current_frame->len += (uint8_t *)in - aligned_start;
486 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
487 while (in < (const __m128i *)limit) {
488 __m128i data = _mm_load_si128(in);
489 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
490 __m128i found = _mm_cmpeq_epi8(data, needle);
491 if (!_mm_testz_si128(found, found)) {
498 current_frame->len = (uint8_t *)out - current_frame->data;
502 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
504 return (const uint8_t *)in;
508 void decode_packs(const libusb_transfer *xfr,
509 const char *sync_pattern,
511 FrameAllocator::Frame *current_frame,
512 const char *frame_type_name,
513 function<void(const uint8_t *start)> start_callback)
516 for (int i = 0; i < xfr->num_iso_packets; i++) {
517 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
519 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
520 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
525 const uint8_t *start = xfr->buffer + offset;
526 const uint8_t *limit = start + pack->actual_length;
527 while (start < limit) { // Usually runs only one iteration.
529 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
530 if (start == limit) break;
531 assert(start < limit);
534 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
535 if (start_next_frame == nullptr) {
536 // add the rest of the buffer
537 add_to_frame(current_frame, frame_type_name, start, limit);
540 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
541 start = start_next_frame + sync_length; // skip sync
542 start_callback(start);
546 dump_pack(xfr, offset, pack);
548 offset += pack->length;
552 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
554 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
555 fprintf(stderr, "transfer status %d\n", xfr->status);
556 libusb_free_transfer(xfr);
560 assert(xfr->user_data != nullptr);
561 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
563 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
564 if (xfr->endpoint == 0x84) {
565 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
567 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
570 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
571 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
572 uint8_t *buf = libusb_control_transfer_get_data(xfr);
574 if (setup->wIndex == 44) {
575 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
577 printf("read register %2d: 0x%02x%02x%02x%02x\n",
578 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
581 memcpy(usb->register_file + usb->current_register, buf, 4);
582 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
583 if (usb->current_register == 0) {
584 // read through all of them
585 printf("register dump:");
586 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
587 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
591 libusb_fill_control_setup(xfr->buffer,
592 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
593 /*index=*/usb->current_register, /*length=*/4);
598 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
599 for (i = 0; i < xfr->actual_length; i++) {
600 printf("%02x", xfr->buffer[i]);
610 if (libusb_submit_transfer(xfr) < 0) {
611 fprintf(stderr, "error re-submitting URB\n");
616 void BMUSBCapture::usb_thread_func()
619 memset(¶m, 0, sizeof(param));
620 param.sched_priority = 1;
621 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
622 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
624 while (!should_quit) {
625 int rc = libusb_handle_events(nullptr);
626 if (rc != LIBUSB_SUCCESS)
631 void BMUSBCapture::configure_card()
633 if (video_frame_allocator == nullptr) {
634 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
636 if (audio_frame_allocator == nullptr) {
637 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
639 dequeue_thread_should_quit = false;
640 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
643 struct libusb_transfer *xfr;
645 rc = libusb_init(nullptr);
647 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
651 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
652 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
653 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
655 fprintf(stderr, "Error finding USB device\n");
659 libusb_config_descriptor *config;
660 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
662 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
665 printf("%d interface\n", config->bNumInterfaces);
666 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
667 printf(" interface %d\n", interface_number);
668 const libusb_interface *interface = &config->interface[interface_number];
669 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
670 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
671 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
672 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
673 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
674 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
679 rc = libusb_set_configuration(devh, /*configuration=*/1);
681 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
685 rc = libusb_claim_interface(devh, 0);
687 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
691 // Alternate setting 1 is output, alternate setting 2 is input.
692 // Card is reset when switching alternates, so the driver uses
693 // this “double switch” when it wants to reset.
695 // There's also alternate settings 3 and 4, which seem to be
696 // like 1 and 2 except they advertise less bandwidth needed.
697 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
699 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
702 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
704 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
708 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
710 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
716 rc = libusb_claim_interface(devh, 3);
718 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
724 // 44 is some kind of timer register (first 16 bits count upwards)
725 // 24 is some sort of watchdog?
726 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
727 // (or will go to 0x73c60010?), also seen 0x73c60100
728 // 12 also changes all the time, unclear why
729 // 16 seems to be autodetected mode somehow
730 // -- this is e00115e0 after reset?
731 // ed0115e0 after mode change [to output?]
732 // 2d0015e0 after more mode change [to input]
733 // ed0115e0 after more mode change
734 // 2d0015e0 after more mode change
736 // 390115e0 seems to indicate we have signal
737 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
739 // 200015e0 on startup
740 // changes to 250115e0 when we sync to the signal
742 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
744 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
746 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
747 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
749 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
750 // perhaps some of them are related to analog output?
752 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
753 // but the driver sets it to 0x8036802a at some point.
755 // all of this is on request 214/215. other requests (192, 219,
756 // 222, 223, 224) are used for firmware upgrade. Probably best to
757 // stay out of it unless you know what you're doing.
761 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
764 // 0x01 - stable signal
766 // 0x08 - unknown (audio??)
776 static const ctrl ctrls[] = {
777 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
778 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
780 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
782 // clearing the 0x08000000 bit seems to change the capture format (other source?)
783 // 0x10000000 = analog audio instead of embedded audio, it seems
784 // 0x3a000000 = component video? (analog audio)
785 // 0x3c000000 = composite video? (analog audio)
786 // 0x3e000000 = s-video? (analog audio)
787 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
788 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
789 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
790 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
791 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
794 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
795 uint32_t flipped = htonl(ctrls[req].data);
796 static uint8_t value[4];
797 memcpy(value, &flipped, sizeof(flipped));
798 int size = sizeof(value);
799 //if (ctrls[req].request == 215) size = 0;
800 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
801 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
803 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
807 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
808 for (int i = 0; i < rc; ++i) {
809 printf("%02x", value[i]);
817 static int my_index = 0;
818 static uint8_t value[4];
819 int size = sizeof(value);
820 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
821 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
823 fprintf(stderr, "Error on control\n");
826 printf("rc=%d index=%d: 0x", rc, my_index);
827 for (int i = 0; i < rc; ++i) {
828 printf("%02x", value[i]);
835 // set up an asynchronous transfer of the timer register
836 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
837 static int completed = 0;
839 xfr = libusb_alloc_transfer(0);
840 libusb_fill_control_setup(cmdbuf,
841 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
842 /*index=*/44, /*length=*/4);
843 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
844 xfr->user_data = this;
845 libusb_submit_transfer(xfr);
847 // set up an asynchronous transfer of register 24
848 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
849 static int completed2 = 0;
851 xfr = libusb_alloc_transfer(0);
852 libusb_fill_control_setup(cmdbuf2,
853 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
854 /*index=*/24, /*length=*/4);
855 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
856 xfr->user_data = this;
857 libusb_submit_transfer(xfr);
860 // set up an asynchronous transfer of the register dump
861 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
862 static int completed3 = 0;
864 xfr = libusb_alloc_transfer(0);
865 libusb_fill_control_setup(cmdbuf3,
866 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
867 /*index=*/current_register, /*length=*/4);
868 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
869 xfr->user_data = this;
870 //libusb_submit_transfer(xfr);
872 audiofp = fopen("audio.raw", "wb");
874 // set up isochronous transfers for audio and video
875 for (int e = 3; e <= 4; ++e) {
876 //int num_transfers = (e == 3) ? 6 : 6;
877 int num_transfers = 6;
878 for (int i = 0; i < num_transfers; ++i) {
879 int num_iso_pack, size;
881 // Video seems to require isochronous packets scaled with the width;
882 // seemingly six lines is about right, rounded up to the required 1kB
884 size = WIDTH * 2 * 6;
885 // Note that for 10-bit input, you'll need to increase size accordingly.
886 //size = size * 4 / 3;
887 if (size % 1024 != 0) {
891 num_iso_pack = (2 << 18) / size; // 512 kB.
892 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
897 int num_bytes = num_iso_pack * size;
898 uint8_t *buf = new uint8_t[num_bytes];
900 xfr = libusb_alloc_transfer(num_iso_pack);
902 fprintf(stderr, "oom\n");
906 int ep = LIBUSB_ENDPOINT_IN | e;
907 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
908 num_iso_pack, cb_xfr, nullptr, 0);
909 libusb_set_iso_packet_lengths(xfr, size);
910 xfr->user_data = this;
911 iso_xfrs.push_back(xfr);
916 void BMUSBCapture::start_bm_capture()
918 printf("starting capture\n");
920 for (libusb_transfer *xfr : iso_xfrs) {
921 printf("submitting transfer...\n");
922 int rc = libusb_submit_transfer(xfr);
925 //printf("num_bytes=%d\n", num_bytes);
926 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
927 xfr->endpoint, i, libusb_error_name(rc));
934 libusb_release_interface(devh, 0);
938 libusb_exit(nullptr);
943 void BMUSBCapture::stop_dequeue_thread()
945 dequeue_thread_should_quit = true;
946 queues_not_empty.notify_all();
947 dequeue_thread.join();
950 void BMUSBCapture::start_bm_thread()
953 usb_thread = thread(&BMUSBCapture::usb_thread_func);
956 void BMUSBCapture::stop_bm_thread()