1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
10 #include <netinet/in.h>
17 #include <immintrin.h>
23 #include <condition_variable>
34 using namespace std::placeholders;
37 #define HEIGHT 750 /* 30 lines ancillary data? */
39 //#define HEIGHT 1125 /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210
46 #define FRAME_SIZE (8 << 20)
51 atomic<bool> should_quit;
53 FrameAllocator::~FrameAllocator() {}
55 #define NUM_QUEUED_FRAMES 16
56 class MallocFrameAllocator : public FrameAllocator {
58 MallocFrameAllocator(size_t frame_size);
59 Frame alloc_frame() override;
60 void release_frame(Frame frame) override;
66 stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
69 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
70 : frame_size(frame_size)
72 for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
73 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
77 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
82 unique_lock<mutex> lock(freelist_mutex); // Meh.
83 if (freelist.empty()) {
84 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
87 vf.data = freelist.top().release();
89 freelist.pop(); // Meh.
94 void MallocFrameAllocator::release_frame(Frame frame)
96 if (frame.overflow > 0) {
97 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
99 unique_lock<mutex> lock(freelist_mutex);
100 freelist.push(unique_ptr<uint8_t[]>(frame.data));
103 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
108 return (b - a < 0x8000);
110 int wrap_b = 0x10000 + int(b);
111 return (wrap_b - a < 0x8000);
115 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
117 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
118 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
119 q->back().timecode, timecode);
120 frame.owner->release_frame(frame);
126 qf.timecode = timecode;
130 unique_lock<mutex> lock(queue_lock);
131 q->push_back(move(qf));
133 queues_not_empty.notify_one(); // might be spurious
136 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
138 FILE *fp = fopen(filename, "wb");
139 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
140 printf("short write!\n");
145 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
147 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
150 void BMUSBCapture::dequeue_thread_func()
152 if (has_dequeue_callbacks) {
153 dequeue_init_callback();
155 while (!dequeue_thread_should_quit) {
156 unique_lock<mutex> lock(queue_lock);
157 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
159 uint16_t video_timecode = pending_video_frames.front().timecode;
160 uint16_t audio_timecode = pending_audio_frames.front().timecode;
161 if (video_timecode < audio_timecode) {
162 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
164 video_frame_allocator->release_frame(pending_video_frames.front().frame);
165 pending_video_frames.pop_front();
166 } else if (audio_timecode < video_timecode) {
167 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
169 QueuedFrame audio_frame = pending_audio_frames.front();
170 pending_audio_frames.pop_front();
172 frame_callback(audio_timecode,
173 FrameAllocator::Frame(), 0, 0x0000,
174 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
176 QueuedFrame video_frame = pending_video_frames.front();
177 QueuedFrame audio_frame = pending_audio_frames.front();
178 pending_audio_frames.pop_front();
179 pending_video_frames.pop_front();
184 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
185 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
186 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
189 frame_callback(video_timecode,
190 video_frame.frame, HEADER_SIZE, video_frame.format,
191 audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
194 if (has_dequeue_callbacks) {
195 dequeue_cleanup_callback();
199 void BMUSBCapture::start_new_frame(const uint8_t *start)
201 uint16_t format = (start[3] << 8) | start[2];
202 uint16_t timecode = (start[1] << 8) | start[0];
204 if (current_video_frame.len > 0) {
205 // If format is 0x0800 (no signal), add a fake (empty) audio
206 // frame to get it out of the queue.
207 // TODO: Figure out if there are other formats that come with
208 // no audio, and treat them the same.
209 if (format == 0x0800) {
210 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
211 if (fake_audio_frame.data == nullptr) {
212 // Oh well, it's just a no-signal frame anyway.
213 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
214 current_video_frame.owner->release_frame(current_video_frame);
215 current_video_frame = video_frame_allocator->alloc_frame();
218 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
221 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
223 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
225 // //start[7], start[6], start[5], start[4],
226 // read_current_frame, FRAME_SIZE);
228 current_video_frame = video_frame_allocator->alloc_frame();
229 //if (current_video_frame.data == nullptr) {
230 // read_current_frame = -1;
232 // read_current_frame = 0;
236 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
238 uint16_t format = (start[3] << 8) | start[2];
239 uint16_t timecode = (start[1] << 8) | start[0];
240 if (current_audio_frame.len > 0) {
241 //dump_audio_block();
242 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
244 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
245 // format, timecode, read_current_audio_block);
246 current_audio_frame = audio_frame_allocator->alloc_frame();
250 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
252 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
253 for (unsigned j = 0; j < pack->actual_length; j++) {
254 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
255 printf("%02x", xfr->buffer[j + offset]);
258 else if ((j % 8) == 7)
266 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
269 uint8_t *dptr1 = dest1;
270 uint8_t *dptr2 = dest2;
272 for (size_t i = 0; i < n; i += 2) {
278 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
280 if (current_frame->data == nullptr ||
281 current_frame->len > current_frame->size ||
286 int bytes = end - start;
287 if (current_frame->len + bytes > current_frame->size) {
288 current_frame->overflow = current_frame->len + bytes - current_frame->size;
289 current_frame->len = current_frame->size;
290 if (current_frame->overflow > 1048576) {
291 printf("%d bytes overflow after last %s frame\n",
292 int(current_frame->overflow), frame_type_name);
293 current_frame->overflow = 0;
297 if (current_frame->interleaved) {
298 uint8_t *data = current_frame->data + current_frame->len / 2;
299 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
300 if (current_frame->len % 2 == 1) {
304 if (bytes % 2 == 1) {
307 ++current_frame->len;
310 memcpy_interleaved(data, data2, start, bytes);
311 current_frame->len += bytes;
313 memcpy(current_frame->data + current_frame->len, start, bytes);
314 current_frame->len += bytes;
322 void avx2_dump(const char *name, __m256i n)
324 printf("%-10s:", name);
325 printf(" %02x", _mm256_extract_epi8(n, 0));
326 printf(" %02x", _mm256_extract_epi8(n, 1));
327 printf(" %02x", _mm256_extract_epi8(n, 2));
328 printf(" %02x", _mm256_extract_epi8(n, 3));
329 printf(" %02x", _mm256_extract_epi8(n, 4));
330 printf(" %02x", _mm256_extract_epi8(n, 5));
331 printf(" %02x", _mm256_extract_epi8(n, 6));
332 printf(" %02x", _mm256_extract_epi8(n, 7));
334 printf(" %02x", _mm256_extract_epi8(n, 8));
335 printf(" %02x", _mm256_extract_epi8(n, 9));
336 printf(" %02x", _mm256_extract_epi8(n, 10));
337 printf(" %02x", _mm256_extract_epi8(n, 11));
338 printf(" %02x", _mm256_extract_epi8(n, 12));
339 printf(" %02x", _mm256_extract_epi8(n, 13));
340 printf(" %02x", _mm256_extract_epi8(n, 14));
341 printf(" %02x", _mm256_extract_epi8(n, 15));
343 printf(" %02x", _mm256_extract_epi8(n, 16));
344 printf(" %02x", _mm256_extract_epi8(n, 17));
345 printf(" %02x", _mm256_extract_epi8(n, 18));
346 printf(" %02x", _mm256_extract_epi8(n, 19));
347 printf(" %02x", _mm256_extract_epi8(n, 20));
348 printf(" %02x", _mm256_extract_epi8(n, 21));
349 printf(" %02x", _mm256_extract_epi8(n, 22));
350 printf(" %02x", _mm256_extract_epi8(n, 23));
352 printf(" %02x", _mm256_extract_epi8(n, 24));
353 printf(" %02x", _mm256_extract_epi8(n, 25));
354 printf(" %02x", _mm256_extract_epi8(n, 26));
355 printf(" %02x", _mm256_extract_epi8(n, 27));
356 printf(" %02x", _mm256_extract_epi8(n, 28));
357 printf(" %02x", _mm256_extract_epi8(n, 29));
358 printf(" %02x", _mm256_extract_epi8(n, 30));
359 printf(" %02x", _mm256_extract_epi8(n, 31));
364 // Does a memcpy and memchr in one to reduce processing time.
365 // Note that the benefit is somewhat limited if your L3 cache is small,
366 // as you'll (unfortunately) spend most of the time loading the data
369 // Complicated cases are left to the slow path; it basically stops copying
370 // up until the first instance of "sync_char" (usually a bit before, actually).
371 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
372 // data, and what we really need this for is the 00 00 ff ff marker in video data.
373 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
375 if (current_frame->data == nullptr ||
376 current_frame->len > current_frame->size ||
380 size_t orig_bytes = limit - start;
381 if (orig_bytes < 128) {
386 // Don't read more bytes than we can write.
387 limit = min(limit, start + (current_frame->size - current_frame->len));
389 // Align end to 32 bytes.
390 limit = (const uint8_t *)(intptr_t(limit) & ~31);
392 if (start >= limit) {
396 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
397 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
398 if (aligned_start != start) {
399 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
400 if (sync_start == nullptr) {
401 add_to_frame(current_frame, "", start, aligned_start);
403 add_to_frame(current_frame, "", start, sync_start);
408 // Make the length a multiple of 64.
409 if (current_frame->interleaved) {
410 if (((limit - aligned_start) % 64) != 0) {
413 assert(((limit - aligned_start) % 64) == 0);
417 const __m256i needle = _mm256_set1_epi8(sync_char);
419 const __restrict __m256i *in = (const __m256i *)aligned_start;
420 if (current_frame->interleaved) {
421 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
422 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
423 if (current_frame->len % 2 == 1) {
427 __m256i shuffle_cw = _mm256_set_epi8(
428 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
429 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
430 while (in < (const __m256i *)limit) {
431 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
432 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
433 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
435 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
436 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
437 __m256i found = _mm256_or_si256(found1, found2);
439 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
440 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
442 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
443 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
445 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
446 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
448 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
449 _mm256_storeu_si256(out2, hi);
451 if (!_mm256_testz_si256(found, found)) {
459 current_frame->len += (uint8_t *)in - aligned_start;
461 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
462 while (in < (const __m256i *)limit) {
463 __m256i data = _mm256_load_si256(in);
464 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
465 __m256i found = _mm256_cmpeq_epi8(data, needle);
466 if (!_mm256_testz_si256(found, found)) {
473 current_frame->len = (uint8_t *)out - current_frame->data;
476 const __m128i needle = _mm_set1_epi8(sync_char);
478 const __m128i *in = (const __m128i *)aligned_start;
479 if (current_frame->interleaved) {
480 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
481 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
482 if (current_frame->len % 2 == 1) {
486 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
487 while (in < (const __m128i *)limit) {
488 __m128i data1 = _mm_load_si128(in);
489 __m128i data2 = _mm_load_si128(in + 1);
490 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
491 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
492 __m128i data1_hi = _mm_srli_epi16(data1, 8);
493 __m128i data2_hi = _mm_srli_epi16(data2, 8);
494 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
495 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
496 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
497 _mm_storeu_si128(out2, hi);
498 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
499 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
500 if (!_mm_testz_si128(found1, found1) ||
501 !_mm_testz_si128(found2, found2)) {
509 current_frame->len += (uint8_t *)in - aligned_start;
511 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
512 while (in < (const __m128i *)limit) {
513 __m128i data = _mm_load_si128(in);
514 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
515 __m128i found = _mm_cmpeq_epi8(data, needle);
516 if (!_mm_testz_si128(found, found)) {
523 current_frame->len = (uint8_t *)out - current_frame->data;
527 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
529 return (const uint8_t *)in;
533 void decode_packs(const libusb_transfer *xfr,
534 const char *sync_pattern,
536 FrameAllocator::Frame *current_frame,
537 const char *frame_type_name,
538 function<void(const uint8_t *start)> start_callback)
541 for (int i = 0; i < xfr->num_iso_packets; i++) {
542 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
544 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
545 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
550 const uint8_t *start = xfr->buffer + offset;
551 const uint8_t *limit = start + pack->actual_length;
552 while (start < limit) { // Usually runs only one iteration.
554 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
555 if (start == limit) break;
556 assert(start < limit);
559 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
560 if (start_next_frame == nullptr) {
561 // add the rest of the buffer
562 add_to_frame(current_frame, frame_type_name, start, limit);
565 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
566 start = start_next_frame + sync_length; // skip sync
567 start_callback(start);
571 dump_pack(xfr, offset, pack);
573 offset += pack->length;
577 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
579 if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
580 fprintf(stderr, "transfer status %d\n", xfr->status);
581 libusb_free_transfer(xfr);
585 assert(xfr->user_data != nullptr);
586 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
588 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
589 if (xfr->endpoint == 0x84) {
590 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
592 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
595 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
596 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
597 uint8_t *buf = libusb_control_transfer_get_data(xfr);
599 if (setup->wIndex == 44) {
600 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
602 printf("read register %2d: 0x%02x%02x%02x%02x\n",
603 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
606 memcpy(usb->register_file + usb->current_register, buf, 4);
607 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
608 if (usb->current_register == 0) {
609 // read through all of them
610 printf("register dump:");
611 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
612 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
616 libusb_fill_control_setup(xfr->buffer,
617 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
618 /*index=*/usb->current_register, /*length=*/4);
623 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
624 for (i = 0; i < xfr->actual_length; i++) {
625 printf("%02x", xfr->buffer[i]);
635 if (libusb_submit_transfer(xfr) < 0) {
636 fprintf(stderr, "error re-submitting URB\n");
641 void BMUSBCapture::usb_thread_func()
644 memset(¶m, 0, sizeof(param));
645 param.sched_priority = 1;
646 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
647 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
649 while (!should_quit) {
650 int rc = libusb_handle_events(nullptr);
651 if (rc != LIBUSB_SUCCESS)
656 void BMUSBCapture::configure_card()
658 if (video_frame_allocator == nullptr) {
659 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
661 if (audio_frame_allocator == nullptr) {
662 set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
664 dequeue_thread_should_quit = false;
665 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
668 struct libusb_transfer *xfr;
670 rc = libusb_init(nullptr);
672 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
676 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
677 //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
678 struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
680 fprintf(stderr, "Error finding USB device\n");
684 libusb_config_descriptor *config;
685 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
687 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
690 printf("%d interface\n", config->bNumInterfaces);
691 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
692 printf(" interface %d\n", interface_number);
693 const libusb_interface *interface = &config->interface[interface_number];
694 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
695 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
696 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
697 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
698 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
699 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
704 rc = libusb_set_configuration(devh, /*configuration=*/1);
706 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
710 rc = libusb_claim_interface(devh, 0);
712 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
716 // Alternate setting 1 is output, alternate setting 2 is input.
717 // Card is reset when switching alternates, so the driver uses
718 // this “double switch” when it wants to reset.
720 // There's also alternate settings 3 and 4, which seem to be
721 // like 1 and 2 except they advertise less bandwidth needed.
722 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
724 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
727 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
729 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
733 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
735 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
741 rc = libusb_claim_interface(devh, 3);
743 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
749 // 44 is some kind of timer register (first 16 bits count upwards)
750 // 24 is some sort of watchdog?
751 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
752 // (or will go to 0x73c60010?), also seen 0x73c60100
753 // 12 also changes all the time, unclear why
754 // 16 seems to be autodetected mode somehow
755 // -- this is e00115e0 after reset?
756 // ed0115e0 after mode change [to output?]
757 // 2d0015e0 after more mode change [to input]
758 // ed0115e0 after more mode change
759 // 2d0015e0 after more mode change
761 // 390115e0 seems to indicate we have signal
762 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
764 // 200015e0 on startup
765 // changes to 250115e0 when we sync to the signal
767 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
769 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
771 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
772 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
774 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
775 // perhaps some of them are related to analog output?
777 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
778 // but the driver sets it to 0x8036802a at some point.
780 // all of this is on request 214/215. other requests (192, 219,
781 // 222, 223, 224) are used for firmware upgrade. Probably best to
782 // stay out of it unless you know what you're doing.
786 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
789 // 0x01 - stable signal
791 // 0x08 - unknown (audio??)
801 static const ctrl ctrls[] = {
802 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
803 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
805 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
807 // clearing the 0x08000000 bit seems to change the capture format (other source?)
808 // 0x10000000 = analog audio instead of embedded audio, it seems
809 // 0x3a000000 = component video? (analog audio)
810 // 0x3c000000 = composite video? (analog audio)
811 // 0x3e000000 = s-video? (analog audio)
812 { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
813 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
814 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
815 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
816 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
819 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
820 uint32_t flipped = htonl(ctrls[req].data);
821 static uint8_t value[4];
822 memcpy(value, &flipped, sizeof(flipped));
823 int size = sizeof(value);
824 //if (ctrls[req].request == 215) size = 0;
825 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
826 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
828 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
832 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
833 for (int i = 0; i < rc; ++i) {
834 printf("%02x", value[i]);
842 static int my_index = 0;
843 static uint8_t value[4];
844 int size = sizeof(value);
845 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
846 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
848 fprintf(stderr, "Error on control\n");
851 printf("rc=%d index=%d: 0x", rc, my_index);
852 for (int i = 0; i < rc; ++i) {
853 printf("%02x", value[i]);
860 // set up an asynchronous transfer of the timer register
861 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
862 static int completed = 0;
864 xfr = libusb_alloc_transfer(0);
865 libusb_fill_control_setup(cmdbuf,
866 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
867 /*index=*/44, /*length=*/4);
868 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
869 xfr->user_data = this;
870 libusb_submit_transfer(xfr);
872 // set up an asynchronous transfer of register 24
873 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
874 static int completed2 = 0;
876 xfr = libusb_alloc_transfer(0);
877 libusb_fill_control_setup(cmdbuf2,
878 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
879 /*index=*/24, /*length=*/4);
880 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
881 xfr->user_data = this;
882 libusb_submit_transfer(xfr);
885 // set up an asynchronous transfer of the register dump
886 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
887 static int completed3 = 0;
889 xfr = libusb_alloc_transfer(0);
890 libusb_fill_control_setup(cmdbuf3,
891 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
892 /*index=*/current_register, /*length=*/4);
893 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
894 xfr->user_data = this;
895 //libusb_submit_transfer(xfr);
897 audiofp = fopen("audio.raw", "wb");
899 // set up isochronous transfers for audio and video
900 for (int e = 3; e <= 4; ++e) {
901 //int num_transfers = (e == 3) ? 6 : 6;
902 int num_transfers = 6;
903 for (int i = 0; i < num_transfers; ++i) {
904 int num_iso_pack, size;
906 // Video seems to require isochronous packets scaled with the width;
907 // seemingly six lines is about right, rounded up to the required 1kB
909 size = WIDTH * 2 * 6;
910 // Note that for 10-bit input, you'll need to increase size accordingly.
911 //size = size * 4 / 3;
912 if (size % 1024 != 0) {
916 num_iso_pack = (2 << 18) / size; // 512 kB.
917 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
922 int num_bytes = num_iso_pack * size;
923 uint8_t *buf = new uint8_t[num_bytes];
925 xfr = libusb_alloc_transfer(num_iso_pack);
927 fprintf(stderr, "oom\n");
931 int ep = LIBUSB_ENDPOINT_IN | e;
932 libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
933 num_iso_pack, cb_xfr, nullptr, 0);
934 libusb_set_iso_packet_lengths(xfr, size);
935 xfr->user_data = this;
936 iso_xfrs.push_back(xfr);
941 void BMUSBCapture::start_bm_capture()
943 printf("starting capture\n");
945 for (libusb_transfer *xfr : iso_xfrs) {
946 printf("submitting transfer...\n");
947 int rc = libusb_submit_transfer(xfr);
950 //printf("num_bytes=%d\n", num_bytes);
951 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
952 xfr->endpoint, i, libusb_error_name(rc));
959 libusb_release_interface(devh, 0);
963 libusb_exit(nullptr);
968 void BMUSBCapture::stop_dequeue_thread()
970 dequeue_thread_should_quit = true;
971 queues_not_empty.notify_all();
972 dequeue_thread.join();
975 void BMUSBCapture::start_bm_thread()
978 usb_thread = thread(&BMUSBCapture::usb_thread_func);
981 void BMUSBCapture::stop_bm_thread()