1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <arpa/inet.h>
17 #include <immintrin.h>
25 #include <condition_variable>
32 using namespace std::placeholders;
35 #define HEIGHT 750 /* 30 lines ancillary data? */
37 //#define HEIGHT 1125 /* ??? lines ancillary data? */
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
42 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
43 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
44 #define FRAME_SIZE (8 << 20)
49 atomic<bool> should_quit;
51 FrameAllocator::~FrameAllocator() {}
53 #define NUM_QUEUED_FRAMES 8
54 class MallocFrameAllocator : public FrameAllocator {
56 MallocFrameAllocator(size_t frame_size);
57 Frame alloc_frame() override;
58 void release_frame(Frame frame) override;
64 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
67 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
68 : frame_size(frame_size)
70 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
71 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
75 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
80 unique_lock<mutex> lock(freelist_mutex); // Meh.
81 if (freelist.empty()) {
82 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
85 vf.data = freelist.top().release();
87 freelist.pop(); // Meh.
92 void MallocFrameAllocator::release_frame(Frame frame)
94 unique_lock<mutex> lock(freelist_mutex);
95 freelist.push(unique_ptr<uint8_t[]>(frame.data));
98 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
103 return (b - a < 0x8000);
105 int wrap_b = 0x10000 + int(b);
106 return (wrap_b - a < 0x8000);
110 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
112 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
113 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
114 q->back().timecode, timecode);
115 frame.owner->release_frame(frame);
121 qf.timecode = timecode;
125 unique_lock<mutex> lock(queue_lock);
126 q->push_back(move(qf));
128 queues_not_empty.notify_one(); // might be spurious
131 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
133 FILE *fp = fopen(filename, "wb");
134 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
135 printf("short write!\n");
140 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
142 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
145 void BMUSBCapture::dequeue_thread()
148 unique_lock<mutex> lock(queue_lock);
149 queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
151 uint16_t video_timecode = pending_video_frames.front().timecode;
152 uint16_t audio_timecode = pending_audio_frames.front().timecode;
153 if (video_timecode < audio_timecode) {
154 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
156 video_frame_allocator->release_frame(pending_video_frames.front().frame);
157 pending_video_frames.pop_front();
158 } else if (audio_timecode < video_timecode) {
159 printf("Audio block 0x%04x without corresponding video block, dropping.\n",
161 audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
162 pending_audio_frames.pop_front();
164 QueuedFrame video_frame = pending_video_frames.front();
165 QueuedFrame audio_frame = pending_audio_frames.front();
166 pending_audio_frames.pop_front();
167 pending_video_frames.pop_front();
172 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
173 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
174 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
177 frame_callback(video_timecode,
178 video_frame.frame, HEADER_SIZE, video_frame.format,
179 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
184 void BMUSBCapture::start_new_frame(const uint8_t *start)
186 uint16_t format = (start[3] << 8) | start[2];
187 uint16_t timecode = (start[1] << 8) | start[0];
189 if (current_video_frame.len > 0) {
191 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
193 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
195 // //start[7], start[6], start[5], start[4],
196 // read_current_frame, FRAME_SIZE);
198 current_video_frame = video_frame_allocator->alloc_frame();
199 //if (current_video_frame.data == nullptr) {
200 // read_current_frame = -1;
202 // read_current_frame = 0;
206 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
208 uint16_t format = (start[3] << 8) | start[2];
209 uint16_t timecode = (start[1] << 8) | start[0];
210 if (current_audio_frame.len > 0) {
211 //dump_audio_block();
212 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
214 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
215 // format, timecode, read_current_audio_block);
216 current_audio_frame = audio_frame_allocator->alloc_frame();
220 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
222 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
223 for (unsigned j = 0; j < pack->actual_length; j++) {
224 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
225 printf("%02x", xfr->buffer[j + offset]);
228 else if ((j % 8) == 7)
236 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
239 uint8_t *dptr1 = dest1;
240 uint8_t *dptr2 = dest2;
242 for (size_t i = 0; i < n; i += 2) {
248 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
250 if (current_frame->data == nullptr ||
251 current_frame->len > current_frame->size ||
256 int bytes = end - start;
257 if (current_frame->len + bytes > current_frame->size) {
258 printf("%d bytes overflow after last %s frame\n",
259 int(current_frame->len + bytes - current_frame->size), frame_type_name);
262 if (current_frame->interleaved) {
263 uint8_t *data = current_frame->data + current_frame->len / 2;
264 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
265 if (current_frame->len % 2 == 1) {
269 if (bytes % 2 == 1) {
272 ++current_frame->len;
275 memcpy_interleaved(data, data2, start, bytes);
276 current_frame->len += bytes;
278 memcpy(current_frame->data + current_frame->len, start, bytes);
279 current_frame->len += bytes;
287 void avx2_dump(const char *name, __m256i n)
289 printf("%-10s:", name);
290 printf(" %02x", _mm256_extract_epi8(n, 0));
291 printf(" %02x", _mm256_extract_epi8(n, 1));
292 printf(" %02x", _mm256_extract_epi8(n, 2));
293 printf(" %02x", _mm256_extract_epi8(n, 3));
294 printf(" %02x", _mm256_extract_epi8(n, 4));
295 printf(" %02x", _mm256_extract_epi8(n, 5));
296 printf(" %02x", _mm256_extract_epi8(n, 6));
297 printf(" %02x", _mm256_extract_epi8(n, 7));
299 printf(" %02x", _mm256_extract_epi8(n, 8));
300 printf(" %02x", _mm256_extract_epi8(n, 9));
301 printf(" %02x", _mm256_extract_epi8(n, 10));
302 printf(" %02x", _mm256_extract_epi8(n, 11));
303 printf(" %02x", _mm256_extract_epi8(n, 12));
304 printf(" %02x", _mm256_extract_epi8(n, 13));
305 printf(" %02x", _mm256_extract_epi8(n, 14));
306 printf(" %02x", _mm256_extract_epi8(n, 15));
308 printf(" %02x", _mm256_extract_epi8(n, 16));
309 printf(" %02x", _mm256_extract_epi8(n, 17));
310 printf(" %02x", _mm256_extract_epi8(n, 18));
311 printf(" %02x", _mm256_extract_epi8(n, 19));
312 printf(" %02x", _mm256_extract_epi8(n, 20));
313 printf(" %02x", _mm256_extract_epi8(n, 21));
314 printf(" %02x", _mm256_extract_epi8(n, 22));
315 printf(" %02x", _mm256_extract_epi8(n, 23));
317 printf(" %02x", _mm256_extract_epi8(n, 24));
318 printf(" %02x", _mm256_extract_epi8(n, 25));
319 printf(" %02x", _mm256_extract_epi8(n, 26));
320 printf(" %02x", _mm256_extract_epi8(n, 27));
321 printf(" %02x", _mm256_extract_epi8(n, 28));
322 printf(" %02x", _mm256_extract_epi8(n, 29));
323 printf(" %02x", _mm256_extract_epi8(n, 30));
324 printf(" %02x", _mm256_extract_epi8(n, 31));
329 // Does a memcpy and memchr in one to reduce processing time.
330 // Note that the benefit is somewhat limited if your L3 cache is small,
331 // as you'll (unfortunately) spend most of the time loading the data
334 // Complicated cases are left to the slow path; it basically stops copying
335 // up until the first instance of "sync_char" (usually a bit before, actually).
336 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
337 // data, and what we really need this for is the 00 00 ff ff marker in video data.
338 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
340 if (current_frame->data == nullptr ||
341 current_frame->len > current_frame->size ||
345 size_t orig_bytes = limit - start;
346 if (orig_bytes < 128) {
351 // Don't read more bytes than we can write.
352 limit = min(limit, start + (current_frame->size - current_frame->len));
354 // Align end to 32 bytes.
355 limit = (const uint8_t *)(intptr_t(limit) & ~31);
357 if (start >= limit) {
361 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
362 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
363 if (aligned_start != start) {
364 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
365 if (sync_start == nullptr) {
366 add_to_frame(current_frame, "", start, aligned_start);
368 add_to_frame(current_frame, "", start, sync_start);
373 // Make the length a multiple of 64.
374 if (current_frame->interleaved) {
375 if (((limit - aligned_start) % 64) != 0) {
378 assert(((limit - aligned_start) % 64) == 0);
382 const __m256i needle = _mm256_set1_epi8(sync_char);
384 const __restrict __m256i *in = (const __m256i *)aligned_start;
385 if (current_frame->interleaved) {
386 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
387 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
388 if (current_frame->len % 2 == 1) {
392 __m256i shuffle_cw = _mm256_set_epi8(
393 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
394 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
395 while (in < (const __m256i *)limit) {
396 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
397 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
398 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
400 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
401 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
402 __m256i found = _mm256_or_si256(found1, found2);
404 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
405 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
407 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
408 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
410 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
411 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
413 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
414 _mm256_storeu_si256(out2, hi);
416 if (!_mm256_testz_si256(found, found)) {
424 current_frame->len += (uint8_t *)in - aligned_start;
426 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
427 while (in < (const __m256i *)limit) {
428 __m256i data = _mm256_load_si256(in);
429 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
430 __m256i found = _mm256_cmpeq_epi8(data, needle);
431 if (!_mm256_testz_si256(found, found)) {
438 current_frame->len = (uint8_t *)out - current_frame->data;
441 const __m128i needle = _mm_set1_epi8(sync_char);
443 const __m128i *in = (const __m128i *)aligned_start;
444 if (current_frame->interleaved) {
445 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
446 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
447 if (current_frame->len % 2 == 1) {
451 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
452 while (in < (const __m128i *)limit) {
453 __m128i data1 = _mm_load_si128(in);
454 __m128i data2 = _mm_load_si128(in + 1);
455 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
456 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
457 __m128i data1_hi = _mm_srli_epi16(data1, 8);
458 __m128i data2_hi = _mm_srli_epi16(data2, 8);
459 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
460 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
461 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
462 _mm_storeu_si128(out2, hi);
463 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
464 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
465 if (!_mm_testz_si128(found1, found1) ||
466 !_mm_testz_si128(found2, found2)) {
474 current_frame->len += (uint8_t *)in - aligned_start;
476 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
477 while (in < (const __m128i *)limit) {
478 __m128i data = _mm_load_si128(in);
479 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
480 __m128i found = _mm_cmpeq_epi8(data, needle);
481 if (!_mm_testz_si128(found, found)) {
488 current_frame->len = (uint8_t *)out - current_frame->data;
492 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
494 return (const uint8_t *)in;
498 void decode_packs(const libusb_transfer *xfr,
499 const char *sync_pattern,
501 FrameAllocator::Frame *current_frame,
502 const char *frame_type_name,
503 function<void(const uint8_t *start)> start_callback)
506 for (int i = 0; i < xfr->num_iso_packets; i++) {
507 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
509 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
510 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
515 const uint8_t *start = xfr->buffer + offset;
516 const uint8_t *limit = start + pack->actual_length;
517 while (start < limit) { // Usually runs only one iteration.
519 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
520 if (start == limit) break;
521 assert(start < limit);
524 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
525 if (start_next_frame == nullptr) {
526 // add the rest of the buffer
527 add_to_frame(current_frame, frame_type_name, start, limit);
530 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
531 start = start_next_frame + sync_length; // skip sync
532 start_callback(start);
536 dump_pack(xfr, offset, pack);
538 offset += pack->length;
542 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
544 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
545 fprintf(stderr, "transfer status %d\n", xfr->status);
546 libusb_free_transfer(xfr);
550 assert(xfr->user_data != nullptr);
551 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
553 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
554 if (xfr->endpoint == 0x84) {
555 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
557 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
560 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
561 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
562 uint8_t *buf = libusb_control_transfer_get_data(xfr);
564 if (setup->wIndex == 44) {
565 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
567 printf("read register %2d: 0x%02x%02x%02x%02x\n",
568 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
571 memcpy(usb->register_file + usb->current_register, buf, 4);
572 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
573 if (usb->current_register == 0) {
574 // read through all of them
575 printf("register dump:");
576 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
577 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
581 libusb_fill_control_setup(xfr->buffer,
582 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
583 /*index=*/usb->current_register, /*length=*/4);
588 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
589 for (i = 0; i < xfr->actual_length; i++) {
590 printf("%02x", xfr->buffer[i]);
600 if (libusb_submit_transfer(xfr) < 0) {
601 fprintf(stderr, "error re-submitting URB\n");
606 void BMUSBCapture::usb_thread_func()
608 printf("usb thread started\n");
611 memset(¶m, 0, sizeof(param));
612 param.sched_priority = 1;
613 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
614 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
616 while (!should_quit) {
617 int rc = libusb_handle_events(nullptr);
618 if (rc != LIBUSB_SUCCESS)
623 void BMUSBCapture::configure_card()
625 if (video_frame_allocator == nullptr) {
626 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
628 if (audio_frame_allocator == nullptr) {
629 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
631 thread(&BMUSBCapture::dequeue_thread, this).detach();
634 struct libusb_transfer *xfr;
636 rc = libusb_init(nullptr);
638 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
642 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
643 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
644 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
646 fprintf(stderr, "Error finding USB device\n");
650 libusb_config_descriptor *config;
651 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
653 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
656 printf("%d interface\n", config->bNumInterfaces);
657 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
658 printf(" interface %d\n", interface_number);
659 const libusb_interface *interface = &config->interface[interface_number];
660 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
661 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
662 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
663 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
664 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
665 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
670 rc = libusb_set_configuration(devh, /*configuration=*/1);
672 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
676 rc = libusb_claim_interface(devh, 0);
678 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
682 // Alternate setting 1 is output, alternate setting 2 is input.
683 // Card is reset when switching alternates, so the driver uses
684 // this “double switch” when it wants to reset.
685 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
687 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
690 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
692 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
696 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
698 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
704 rc = libusb_claim_interface(devh, 3);
706 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
712 // 44 is some kind of timer register (first 16 bits count upwards)
713 // 24 is some sort of watchdog?
714 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
715 // (or will go to 0x73c60010?), also seen 0x73c60100
716 // 12 also changes all the time, unclear why
717 // 16 seems to be autodetected mode somehow
718 // -- this is e00115e0 after reset?
719 // ed0115e0 after mode change [to output?]
720 // 2d0015e0 after more mode change [to input]
721 // ed0115e0 after more mode change
722 // 2d0015e0 after more mode change
724 // 390115e0 seems to indicate we have signal
725 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
727 // 200015e0 on startup
728 // changes to 250115e0 when we sync to the signal
730 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
732 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
734 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
735 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
737 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
738 // perhaps some of them are related to analog output?
740 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
741 // but the driver sets it to 0x8036802a at some point.
743 // all of this is on request 214/215. other requests (192, 219,
744 // 222, 223, 224) are used for firmware upgrade. Probably best to
745 // stay out of it unless you know what you're doing.
749 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
752 // 0x01 - stable signal
754 // 0x08 - unknown (audio??)
764 static const ctrl ctrls[] = {
765 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
766 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
768 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
770 // clearing the 0x08000000 bit seems to change the capture format (other source?)
771 // 0x10000000 = analog audio instead of embedded audio, it seems
772 // 0x3a000000 = component video? (analog audio)
773 // 0x3c000000 = composite video? (analog audio)
774 // 0x3e000000 = s-video? (analog audio)
775 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
776 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
777 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
778 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
779 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
782 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
783 uint32_t flipped = htonl(ctrls[req].data);
784 static uint8_t value[4];
785 memcpy(value, &flipped, sizeof(flipped));
786 int size = sizeof(value);
787 //if (ctrls[req].request == 215) size = 0;
788 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
789 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
791 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
795 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
796 for (int i = 0; i < rc; ++i) {
797 printf("%02x", value[i]);
805 static int my_index = 0;
806 static uint8_t value[4];
807 int size = sizeof(value);
808 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
809 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
811 fprintf(stderr, "Error on control\n");
814 printf("rc=%d index=%d: 0x", rc, my_index);
815 for (int i = 0; i < rc; ++i) {
816 printf("%02x", value[i]);
823 // set up an asynchronous transfer of the timer register
824 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
825 static int completed = 0;
827 xfr = libusb_alloc_transfer(0);
828 libusb_fill_control_setup(cmdbuf,
829 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
830 /*index=*/44, /*length=*/4);
831 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
832 xfr->user_data = this;
833 libusb_submit_transfer(xfr);
835 // set up an asynchronous transfer of register 24
836 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
837 static int completed2 = 0;
839 xfr = libusb_alloc_transfer(0);
840 libusb_fill_control_setup(cmdbuf2,
841 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
842 /*index=*/24, /*length=*/4);
843 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
844 xfr->user_data = this;
845 libusb_submit_transfer(xfr);
848 // set up an asynchronous transfer of the register dump
849 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
850 static int completed3 = 0;
852 xfr = libusb_alloc_transfer(0);
853 libusb_fill_control_setup(cmdbuf3,
854 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
855 /*index=*/current_register, /*length=*/4);
856 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
857 xfr->user_data = this;
858 //libusb_submit_transfer(xfr);
860 audiofp = fopen("audio.raw", "wb");
862 // set up isochronous transfers for audio and video
863 for (int e = 3; e <= 4; ++e) {
864 //int num_transfers = (e == 3) ? 6 : 6;
865 int num_transfers = 6;
866 for (int i = 0; i < num_transfers; ++i) {
867 int num_iso_pack, size;
869 // Video seems to require isochronous packets scaled with the width;
870 // seemingly six lines is about right, rounded up to the required 1kB
872 size = WIDTH * 2 * 6;
873 // Note that for 10-bit input, you'll need to increase size accordingly.
874 //size = size * 4 / 3;
875 if (size % 1024 != 0) {
879 num_iso_pack = (2 << 18) / size; // 512 kB.
880 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
885 int num_bytes = num_iso_pack * size;
886 uint8_t *buf = new uint8_t[num_bytes];
888 xfr = libusb_alloc_transfer(num_iso_pack);
890 fprintf(stderr, "oom\n");
894 int ep = LIBUSB_ENDPOINT_IN | e;
895 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
896 num_iso_pack, cb_xfr, nullptr, 0);
897 libusb_set_iso_packet_lengths(xfr, size);
898 xfr->user_data = this;
899 iso_xfrs.push_back(xfr);
904 void BMUSBCapture::start_bm_capture()
906 printf("starting capture\n");
908 for (libusb_transfer *xfr : iso_xfrs) {
909 printf("submitting transfer...\n");
910 int rc = libusb_submit_transfer(xfr);
913 //printf("num_bytes=%d\n", num_bytes);
914 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
915 xfr->endpoint, i, libusb_error_name(rc));
922 libusb_release_interface(devh, 0);
926 libusb_exit(nullptr);
931 void BMUSBCapture::start_bm_thread()
934 usb_thread = thread(&BMUSBCapture::usb_thread_func);
937 void BMUSBCapture::stop_bm_thread()