1 // TODO: Replace with linking to upstream bmusb.
3 // Intensity Shuttle USB3 prototype capture driver, v0.3
4 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
5 // (can do captures for hours at a time with no drops), except during startup
6 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
7 // Audio comes out as 8-channel 24-bit raw audio.
12 #include <netinet/in.h>
19 #include <immintrin.h>
25 #include <condition_variable>
36 using namespace std::placeholders;
39 #define HEIGHT 750 /* 30 lines ancillary data? */
41 //#define HEIGHT 1125 /* ??? lines ancillary data? */
42 #define HEADER_SIZE 44
43 //#define HEADER_SIZE 0
44 #define AUDIO_HEADER_SIZE 4
46 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
47 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
48 #define FRAME_SIZE (8 << 20)
53 atomic<bool> should_quit;
55 FrameAllocator::~FrameAllocator() {}
57 #define NUM_QUEUED_FRAMES 16
58 class MallocFrameAllocator : public FrameAllocator {
60 MallocFrameAllocator(size_t frame_size);
61 Frame alloc_frame() override;
62 void release_frame(Frame frame) override;
68 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
71 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
72 : frame_size(frame_size)
74 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
75 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
79 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
84 unique_lock<mutex> lock(freelist_mutex); // Meh.
85 if (freelist.empty()) {
86 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
89 vf.data = freelist.top().release();
91 freelist.pop(); // Meh.
96 void MallocFrameAllocator::release_frame(Frame frame)
98 unique_lock<mutex> lock(freelist_mutex);
99 freelist.push(unique_ptr<uint8_t[]>(frame.data));
102 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
107 return (b - a < 0x8000);
109 int wrap_b = 0x10000 + int(b);
110 return (wrap_b - a < 0x8000);
114 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
116 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
117 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
118 q->back().timecode, timecode);
119 frame.owner->release_frame(frame);
125 qf.timecode = timecode;
129 unique_lock<mutex> lock(queue_lock);
130 q->push_back(move(qf));
132 queues_not_empty.notify_one(); // might be spurious
135 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
137 FILE *fp = fopen(filename, "wb");
138 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
139 printf("short write!\n");
144 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
146 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
149 void BMUSBCapture::dequeue_thread()
152 unique_lock<mutex> lock(queue_lock);
153 queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
155 uint16_t video_timecode = pending_video_frames.front().timecode;
156 uint16_t audio_timecode = pending_audio_frames.front().timecode;
157 if (video_timecode < audio_timecode) {
158 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
160 video_frame_allocator->release_frame(pending_video_frames.front().frame);
161 pending_video_frames.pop_front();
162 } else if (audio_timecode < video_timecode) {
163 printf("Audio block 0x%04x without corresponding video block, dropping.\n",
165 audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
166 pending_audio_frames.pop_front();
168 QueuedFrame video_frame = pending_video_frames.front();
169 QueuedFrame audio_frame = pending_audio_frames.front();
170 pending_audio_frames.pop_front();
171 pending_video_frames.pop_front();
176 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
177 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
178 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
181 frame_callback(video_timecode,
182 video_frame.frame, HEADER_SIZE, video_frame.format,
183 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
188 void BMUSBCapture::start_new_frame(const uint8_t *start)
190 uint16_t format = (start[3] << 8) | start[2];
191 uint16_t timecode = (start[1] << 8) | start[0];
193 if (current_video_frame.len > 0) {
195 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
197 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
199 // //start[7], start[6], start[5], start[4],
200 // read_current_frame, FRAME_SIZE);
202 current_video_frame = video_frame_allocator->alloc_frame();
203 //if (current_video_frame.data == nullptr) {
204 // read_current_frame = -1;
206 // read_current_frame = 0;
210 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
212 uint16_t format = (start[3] << 8) | start[2];
213 uint16_t timecode = (start[1] << 8) | start[0];
214 if (current_audio_frame.len > 0) {
215 //dump_audio_block();
216 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
218 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
219 // format, timecode, read_current_audio_block);
220 current_audio_frame = audio_frame_allocator->alloc_frame();
224 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
226 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
227 for (unsigned j = 0; j < pack->actual_length; j++) {
228 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
229 printf("%02x", xfr->buffer[j + offset]);
232 else if ((j % 8) == 7)
240 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
243 uint8_t *dptr1 = dest1;
244 uint8_t *dptr2 = dest2;
246 for (size_t i = 0; i < n; i += 2) {
252 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
254 if (current_frame->data == nullptr ||
255 current_frame->len > current_frame->size ||
260 int bytes = end - start;
261 if (current_frame->len + bytes > current_frame->size) {
262 printf("%d bytes overflow after last %s frame\n",
263 int(current_frame->len + bytes - current_frame->size), frame_type_name);
266 if (current_frame->interleaved) {
267 uint8_t *data = current_frame->data + current_frame->len / 2;
268 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
269 if (current_frame->len % 2 == 1) {
273 if (bytes % 2 == 1) {
276 ++current_frame->len;
279 memcpy_interleaved(data, data2, start, bytes);
280 current_frame->len += bytes;
282 memcpy(current_frame->data + current_frame->len, start, bytes);
283 current_frame->len += bytes;
291 void avx2_dump(const char *name, __m256i n)
293 printf("%-10s:", name);
294 printf(" %02x", _mm256_extract_epi8(n, 0));
295 printf(" %02x", _mm256_extract_epi8(n, 1));
296 printf(" %02x", _mm256_extract_epi8(n, 2));
297 printf(" %02x", _mm256_extract_epi8(n, 3));
298 printf(" %02x", _mm256_extract_epi8(n, 4));
299 printf(" %02x", _mm256_extract_epi8(n, 5));
300 printf(" %02x", _mm256_extract_epi8(n, 6));
301 printf(" %02x", _mm256_extract_epi8(n, 7));
303 printf(" %02x", _mm256_extract_epi8(n, 8));
304 printf(" %02x", _mm256_extract_epi8(n, 9));
305 printf(" %02x", _mm256_extract_epi8(n, 10));
306 printf(" %02x", _mm256_extract_epi8(n, 11));
307 printf(" %02x", _mm256_extract_epi8(n, 12));
308 printf(" %02x", _mm256_extract_epi8(n, 13));
309 printf(" %02x", _mm256_extract_epi8(n, 14));
310 printf(" %02x", _mm256_extract_epi8(n, 15));
312 printf(" %02x", _mm256_extract_epi8(n, 16));
313 printf(" %02x", _mm256_extract_epi8(n, 17));
314 printf(" %02x", _mm256_extract_epi8(n, 18));
315 printf(" %02x", _mm256_extract_epi8(n, 19));
316 printf(" %02x", _mm256_extract_epi8(n, 20));
317 printf(" %02x", _mm256_extract_epi8(n, 21));
318 printf(" %02x", _mm256_extract_epi8(n, 22));
319 printf(" %02x", _mm256_extract_epi8(n, 23));
321 printf(" %02x", _mm256_extract_epi8(n, 24));
322 printf(" %02x", _mm256_extract_epi8(n, 25));
323 printf(" %02x", _mm256_extract_epi8(n, 26));
324 printf(" %02x", _mm256_extract_epi8(n, 27));
325 printf(" %02x", _mm256_extract_epi8(n, 28));
326 printf(" %02x", _mm256_extract_epi8(n, 29));
327 printf(" %02x", _mm256_extract_epi8(n, 30));
328 printf(" %02x", _mm256_extract_epi8(n, 31));
333 // Does a memcpy and memchr in one to reduce processing time.
334 // Note that the benefit is somewhat limited if your L3 cache is small,
335 // as you'll (unfortunately) spend most of the time loading the data
338 // Complicated cases are left to the slow path; it basically stops copying
339 // up until the first instance of "sync_char" (usually a bit before, actually).
340 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
341 // data, and what we really need this for is the 00 00 ff ff marker in video data.
342 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
344 if (current_frame->data == nullptr ||
345 current_frame->len > current_frame->size ||
349 size_t orig_bytes = limit - start;
350 if (orig_bytes < 128) {
355 // Don't read more bytes than we can write.
356 limit = min(limit, start + (current_frame->size - current_frame->len));
358 // Align end to 32 bytes.
359 limit = (const uint8_t *)(intptr_t(limit) & ~31);
361 if (start >= limit) {
365 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
366 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
367 if (aligned_start != start) {
368 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
369 if (sync_start == nullptr) {
370 add_to_frame(current_frame, "", start, aligned_start);
372 add_to_frame(current_frame, "", start, sync_start);
377 // Make the length a multiple of 64.
378 if (current_frame->interleaved) {
379 if (((limit - aligned_start) % 64) != 0) {
382 assert(((limit - aligned_start) % 64) == 0);
386 const __m256i needle = _mm256_set1_epi8(sync_char);
388 const __restrict __m256i *in = (const __m256i *)aligned_start;
389 if (current_frame->interleaved) {
390 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
391 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
392 if (current_frame->len % 2 == 1) {
396 __m256i shuffle_cw = _mm256_set_epi8(
397 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
398 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
399 while (in < (const __m256i *)limit) {
400 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
401 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
402 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
404 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
405 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
406 __m256i found = _mm256_or_si256(found1, found2);
408 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
409 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
411 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
412 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
414 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
415 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
417 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
418 _mm256_storeu_si256(out2, hi);
420 if (!_mm256_testz_si256(found, found)) {
428 current_frame->len += (uint8_t *)in - aligned_start;
430 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
431 while (in < (const __m256i *)limit) {
432 __m256i data = _mm256_load_si256(in);
433 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
434 __m256i found = _mm256_cmpeq_epi8(data, needle);
435 if (!_mm256_testz_si256(found, found)) {
442 current_frame->len = (uint8_t *)out - current_frame->data;
445 const __m128i needle = _mm_set1_epi8(sync_char);
447 const __m128i *in = (const __m128i *)aligned_start;
448 if (current_frame->interleaved) {
449 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
450 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
451 if (current_frame->len % 2 == 1) {
455 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
456 while (in < (const __m128i *)limit) {
457 __m128i data1 = _mm_load_si128(in);
458 __m128i data2 = _mm_load_si128(in + 1);
459 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
460 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
461 __m128i data1_hi = _mm_srli_epi16(data1, 8);
462 __m128i data2_hi = _mm_srli_epi16(data2, 8);
463 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
464 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
465 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
466 _mm_storeu_si128(out2, hi);
467 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
468 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
469 if (!_mm_testz_si128(found1, found1) ||
470 !_mm_testz_si128(found2, found2)) {
478 current_frame->len += (uint8_t *)in - aligned_start;
480 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
481 while (in < (const __m128i *)limit) {
482 __m128i data = _mm_load_si128(in);
483 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
484 __m128i found = _mm_cmpeq_epi8(data, needle);
485 if (!_mm_testz_si128(found, found)) {
492 current_frame->len = (uint8_t *)out - current_frame->data;
496 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
498 return (const uint8_t *)in;
502 void decode_packs(const libusb_transfer *xfr,
503 const char *sync_pattern,
505 FrameAllocator::Frame *current_frame,
506 const char *frame_type_name,
507 function<void(const uint8_t *start)> start_callback)
510 for (int i = 0; i < xfr->num_iso_packets; i++) {
511 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
513 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
514 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
519 const uint8_t *start = xfr->buffer + offset;
520 const uint8_t *limit = start + pack->actual_length;
521 while (start < limit) { // Usually runs only one iteration.
523 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
524 if (start == limit) break;
525 assert(start < limit);
528 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
529 if (start_next_frame == nullptr) {
530 // add the rest of the buffer
531 add_to_frame(current_frame, frame_type_name, start, limit);
534 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
535 start = start_next_frame + sync_length; // skip sync
536 start_callback(start);
540 dump_pack(xfr, offset, pack);
542 offset += pack->length;
546 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
548 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
549 fprintf(stderr, "transfer status %d\n", xfr->status);
550 libusb_free_transfer(xfr);
554 assert(xfr->user_data != nullptr);
555 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
557 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
558 if (xfr->endpoint == 0x84) {
559 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
561 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
564 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
565 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
566 uint8_t *buf = libusb_control_transfer_get_data(xfr);
568 if (setup->wIndex == 44) {
569 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
571 printf("read register %2d: 0x%02x%02x%02x%02x\n",
572 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
575 memcpy(usb->register_file + usb->current_register, buf, 4);
576 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
577 if (usb->current_register == 0) {
578 // read through all of them
579 printf("register dump:");
580 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
581 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
585 libusb_fill_control_setup(xfr->buffer,
586 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
587 /*index=*/usb->current_register, /*length=*/4);
592 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
593 for (i = 0; i < xfr->actual_length; i++) {
594 printf("%02x", xfr->buffer[i]);
604 if (libusb_submit_transfer(xfr) < 0) {
605 fprintf(stderr, "error re-submitting URB\n");
610 void BMUSBCapture::usb_thread_func()
613 memset(¶m, 0, sizeof(param));
614 param.sched_priority = 1;
615 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
616 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
618 while (!should_quit) {
619 int rc = libusb_handle_events(nullptr);
620 if (rc != LIBUSB_SUCCESS)
625 void BMUSBCapture::configure_card()
627 if (video_frame_allocator == nullptr) {
628 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
630 if (audio_frame_allocator == nullptr) {
631 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
633 thread(&BMUSBCapture::dequeue_thread, this).detach();
636 struct libusb_transfer *xfr;
638 rc = libusb_init(nullptr);
640 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
644 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
645 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
646 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
648 fprintf(stderr, "Error finding USB device\n");
652 libusb_config_descriptor *config;
653 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
655 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
658 printf("%d interface\n", config->bNumInterfaces);
659 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
660 printf(" interface %d\n", interface_number);
661 const libusb_interface *interface = &config->interface[interface_number];
662 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
663 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
664 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
665 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
666 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
667 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
672 rc = libusb_set_configuration(devh, /*configuration=*/1);
674 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
678 rc = libusb_claim_interface(devh, 0);
680 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
684 // Alternate setting 1 is output, alternate setting 2 is input.
685 // Card is reset when switching alternates, so the driver uses
686 // this “double switch” when it wants to reset.
688 // There's also alternate settings 3 and 4, which seem to be
689 // like 1 and 2 except they advertise less bandwidth needed.
690 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
692 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
695 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
697 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
701 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
703 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
709 rc = libusb_claim_interface(devh, 3);
711 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
717 // 44 is some kind of timer register (first 16 bits count upwards)
718 // 24 is some sort of watchdog?
719 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
720 // (or will go to 0x73c60010?), also seen 0x73c60100
721 // 12 also changes all the time, unclear why
722 // 16 seems to be autodetected mode somehow
723 // -- this is e00115e0 after reset?
724 // ed0115e0 after mode change [to output?]
725 // 2d0015e0 after more mode change [to input]
726 // ed0115e0 after more mode change
727 // 2d0015e0 after more mode change
729 // 390115e0 seems to indicate we have signal
730 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
732 // 200015e0 on startup
733 // changes to 250115e0 when we sync to the signal
735 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
737 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
739 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
740 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
742 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
743 // perhaps some of them are related to analog output?
745 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
746 // but the driver sets it to 0x8036802a at some point.
748 // all of this is on request 214/215. other requests (192, 219,
749 // 222, 223, 224) are used for firmware upgrade. Probably best to
750 // stay out of it unless you know what you're doing.
754 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
757 // 0x01 - stable signal
759 // 0x08 - unknown (audio??)
769 static const ctrl ctrls[] = {
770 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
771 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
773 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
775 // clearing the 0x08000000 bit seems to change the capture format (other source?)
776 // 0x10000000 = analog audio instead of embedded audio, it seems
777 // 0x3a000000 = component video? (analog audio)
778 // 0x3c000000 = composite video? (analog audio)
779 // 0x3e000000 = s-video? (analog audio)
780 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
781 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
782 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
783 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
784 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
787 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
788 uint32_t flipped = htonl(ctrls[req].data);
789 static uint8_t value[4];
790 memcpy(value, &flipped, sizeof(flipped));
791 int size = sizeof(value);
792 //if (ctrls[req].request == 215) size = 0;
793 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
794 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
796 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
800 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
801 for (int i = 0; i < rc; ++i) {
802 printf("%02x", value[i]);
810 static int my_index = 0;
811 static uint8_t value[4];
812 int size = sizeof(value);
813 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
814 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
816 fprintf(stderr, "Error on control\n");
819 printf("rc=%d index=%d: 0x", rc, my_index);
820 for (int i = 0; i < rc; ++i) {
821 printf("%02x", value[i]);
828 // set up an asynchronous transfer of the timer register
829 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
830 static int completed = 0;
832 xfr = libusb_alloc_transfer(0);
833 libusb_fill_control_setup(cmdbuf,
834 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
835 /*index=*/44, /*length=*/4);
836 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
837 xfr->user_data = this;
838 libusb_submit_transfer(xfr);
840 // set up an asynchronous transfer of register 24
841 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
842 static int completed2 = 0;
844 xfr = libusb_alloc_transfer(0);
845 libusb_fill_control_setup(cmdbuf2,
846 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
847 /*index=*/24, /*length=*/4);
848 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
849 xfr->user_data = this;
850 libusb_submit_transfer(xfr);
853 // set up an asynchronous transfer of the register dump
854 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
855 static int completed3 = 0;
857 xfr = libusb_alloc_transfer(0);
858 libusb_fill_control_setup(cmdbuf3,
859 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
860 /*index=*/current_register, /*length=*/4);
861 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
862 xfr->user_data = this;
863 //libusb_submit_transfer(xfr);
865 audiofp = fopen("audio.raw", "wb");
867 // set up isochronous transfers for audio and video
868 for (int e = 3; e <= 4; ++e) {
869 //int num_transfers = (e == 3) ? 6 : 6;
870 int num_transfers = 6;
871 for (int i = 0; i < num_transfers; ++i) {
872 int num_iso_pack, size;
874 // Video seems to require isochronous packets scaled with the width;
875 // seemingly six lines is about right, rounded up to the required 1kB
877 size = WIDTH * 2 * 6;
878 // Note that for 10-bit input, you'll need to increase size accordingly.
879 //size = size * 4 / 3;
880 if (size % 1024 != 0) {
884 num_iso_pack = (2 << 18) / size; // 512 kB.
885 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
890 int num_bytes = num_iso_pack * size;
891 uint8_t *buf = new uint8_t[num_bytes];
893 xfr = libusb_alloc_transfer(num_iso_pack);
895 fprintf(stderr, "oom\n");
899 int ep = LIBUSB_ENDPOINT_IN | e;
900 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
901 num_iso_pack, cb_xfr, nullptr, 0);
902 libusb_set_iso_packet_lengths(xfr, size);
903 xfr->user_data = this;
904 iso_xfrs.push_back(xfr);
909 void BMUSBCapture::start_bm_capture()
911 printf("starting capture\n");
913 for (libusb_transfer *xfr : iso_xfrs) {
914 printf("submitting transfer...\n");
915 int rc = libusb_submit_transfer(xfr);
918 //printf("num_bytes=%d\n", num_bytes);
919 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
920 xfr->endpoint, i, libusb_error_name(rc));
927 libusb_release_interface(devh, 0);
931 libusb_exit(nullptr);
936 void BMUSBCapture::start_bm_thread()
939 usb_thread = thread(&BMUSBCapture::usb_thread_func);
942 void BMUSBCapture::stop_bm_thread()