1 // Intensity Shuttle USB3 capture driver, v0.7.6
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
203 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
204 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
205 { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
206 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
207 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
208 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
209 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
210 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
211 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
212 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
213 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
214 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
215 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
216 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
218 for (const VideoFormatEntry &entry : entries) {
219 if (normalized_video_format == entry.normalized_video_format) {
220 decoded_video_format->width = entry.width;
221 decoded_video_format->height = entry.height;
222 if (video_format & 0x0800) {
223 decoded_video_format->stride = entry.width * 2;
225 decoded_video_format->stride = v210_stride(entry.width);
227 decoded_video_format->second_field_start = entry.second_field_start;
228 decoded_video_format->extra_lines_top = entry.extra_lines_top;
229 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
230 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
231 decoded_video_format->frame_rate_den = entry.frame_rate_den;
232 decoded_video_format->interlaced = entry.interlaced;
237 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
238 decoded_video_format->width = 1280;
239 decoded_video_format->height = 720;
240 decoded_video_format->stride = 1280 * 2;
241 decoded_video_format->frame_rate_nom = 60;
242 decoded_video_format->frame_rate_den = 1;
246 // There are seemingly no direct indicators of sample rate; you just get
247 // one frame's worth and have to guess from that.
248 int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
250 size_t num_samples = len / 3 / 8;
251 size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
253 // See if we match or are very close to any of the mandatory HDMI sample rates.
254 const int candidate_sample_rates[] = { 32000, 44100, 48000 };
255 for (int rate : candidate_sample_rates) {
256 if (abs(int(num_samples_per_second) - rate) <= 100) {
261 fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
262 num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
268 FrameAllocator::~FrameAllocator() {}
270 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
271 : frame_size(frame_size)
273 for (size_t i = 0; i < num_queued_frames; ++i) {
274 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
278 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
283 unique_lock<mutex> lock(freelist_mutex); // Meh.
284 if (freelist.empty()) {
285 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
288 vf.data = freelist.top().release();
289 vf.size = frame_size;
290 freelist.pop(); // Meh.
295 void MallocFrameAllocator::release_frame(Frame frame)
297 if (frame.overflow > 0) {
298 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
300 unique_lock<mutex> lock(freelist_mutex);
301 freelist.push(unique_ptr<uint8_t[]>(frame.data));
304 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
309 return (b - a < 0x8000);
311 int wrap_b = 0x10000 + int(b);
312 return (wrap_b - a < 0x8000);
316 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
318 unique_lock<mutex> lock(queue_lock);
319 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
320 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
321 q->back().timecode, timecode);
322 frame.owner->release_frame(frame);
328 qf.timecode = timecode;
330 q->push_back(move(qf));
331 queues_not_empty.notify_one(); // might be spurious
334 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
336 FILE *fp = fopen(filename, "wb");
337 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
338 printf("short write!\n");
343 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
345 if (audiofp != nullptr) {
346 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
350 void BMUSBCapture::dequeue_thread_func()
352 char thread_name[16];
353 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
354 pthread_setname_np(pthread_self(), thread_name);
356 if (has_dequeue_callbacks) {
357 dequeue_init_callback();
359 size_t last_sample_rate = 48000;
360 while (!dequeue_thread_should_quit) {
361 unique_lock<mutex> lock(queue_lock);
362 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
364 if (dequeue_thread_should_quit) break;
366 uint16_t video_timecode = pending_video_frames.front().timecode;
367 uint16_t audio_timecode = pending_audio_frames.front().timecode;
368 AudioFormat audio_format;
369 audio_format.bits_per_sample = 24;
370 audio_format.num_channels = 8;
371 audio_format.sample_rate = last_sample_rate;
372 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
373 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
375 QueuedFrame video_frame = pending_video_frames.front();
376 pending_video_frames.pop_front();
378 video_frame_allocator->release_frame(video_frame.frame);
379 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
380 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
382 QueuedFrame audio_frame = pending_audio_frames.front();
383 pending_audio_frames.pop_front();
385 audio_format.id = audio_frame.format;
387 // Use the video format of the pending frame.
388 QueuedFrame video_frame = pending_video_frames.front();
389 VideoFormat video_format;
390 decode_video_format(video_frame.format, &video_format);
392 frame_callback(audio_timecode,
393 FrameAllocator::Frame(), 0, video_format,
394 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
396 QueuedFrame video_frame = pending_video_frames.front();
397 QueuedFrame audio_frame = pending_audio_frames.front();
398 pending_audio_frames.pop_front();
399 pending_video_frames.pop_front();
404 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
405 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
406 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
409 VideoFormat video_format;
410 audio_format.id = audio_frame.format;
411 if (decode_video_format(video_frame.format, &video_format)) {
412 if (audio_frame.frame.len != 0) {
413 audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
414 last_sample_rate = audio_format.sample_rate;
416 frame_callback(video_timecode,
417 video_frame.frame, HEADER_SIZE, video_format,
418 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
420 video_frame_allocator->release_frame(video_frame.frame);
421 audio_format.sample_rate = last_sample_rate;
422 frame_callback(video_timecode,
423 FrameAllocator::Frame(), 0, video_format,
424 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
428 if (has_dequeue_callbacks) {
429 dequeue_cleanup_callback();
433 void BMUSBCapture::start_new_frame(const uint8_t *start)
435 uint16_t format = (start[3] << 8) | start[2];
436 uint16_t timecode = (start[1] << 8) | start[0];
438 if (current_video_frame.len > 0) {
439 current_video_frame.received_timestamp = steady_clock::now();
441 // If format is 0x0800 (no signal), add a fake (empty) audio
442 // frame to get it out of the queue.
443 // TODO: Figure out if there are other formats that come with
444 // no audio, and treat them the same.
445 if (format == 0x0800) {
446 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
447 if (fake_audio_frame.data == nullptr) {
448 // Oh well, it's just a no-signal frame anyway.
449 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
450 current_video_frame.owner->release_frame(current_video_frame);
451 current_video_frame = video_frame_allocator->alloc_frame();
454 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
457 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
459 // Update the assumed frame width. We might be one frame too late on format changes,
460 // but it's much better than asking the user to choose manually.
461 VideoFormat video_format;
462 if (decode_video_format(format, &video_format)) {
463 assumed_frame_width = video_format.width;
466 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
468 // //start[7], start[6], start[5], start[4],
469 // read_current_frame, FRAME_SIZE);
471 current_video_frame = video_frame_allocator->alloc_frame();
472 //if (current_video_frame.data == nullptr) {
473 // read_current_frame = -1;
475 // read_current_frame = 0;
479 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
481 uint16_t format = (start[3] << 8) | start[2];
482 uint16_t timecode = (start[1] << 8) | start[0];
483 if (current_audio_frame.len > 0) {
484 current_audio_frame.received_timestamp = steady_clock::now();
485 //dump_audio_block();
486 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
488 //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
489 // format, timecode);
490 current_audio_frame = audio_frame_allocator->alloc_frame();
494 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
496 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
497 for (unsigned j = 0; j < pack->actual_length; j++) {
498 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
499 printf("%02x", xfr->buffer[j + offset]);
502 else if ((j % 8) == 7)
510 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
513 uint8_t *dptr1 = dest1;
514 uint8_t *dptr2 = dest2;
516 for (size_t i = 0; i < n; i += 2) {
522 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
524 if (current_frame->data == nullptr ||
525 current_frame->len > current_frame->size ||
530 int bytes = end - start;
531 if (current_frame->len + bytes > current_frame->size) {
532 current_frame->overflow = current_frame->len + bytes - current_frame->size;
533 current_frame->len = current_frame->size;
534 if (current_frame->overflow > 1048576) {
535 printf("%d bytes overflow after last %s frame\n",
536 int(current_frame->overflow), frame_type_name);
537 current_frame->overflow = 0;
541 if (current_frame->data_copy != nullptr) {
542 memcpy(current_frame->data_copy + current_frame->len, start, bytes);
544 if (current_frame->interleaved) {
545 uint8_t *data = current_frame->data + current_frame->len / 2;
546 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
547 if (current_frame->len % 2 == 1) {
551 if (bytes % 2 == 1) {
554 ++current_frame->len;
557 memcpy_interleaved(data, data2, start, bytes);
558 current_frame->len += bytes;
560 memcpy(current_frame->data + current_frame->len, start, bytes);
561 current_frame->len += bytes;
567 void avx2_dump(const char *name, __m256i n)
569 printf("%-10s:", name);
570 printf(" %02x", _mm256_extract_epi8(n, 0));
571 printf(" %02x", _mm256_extract_epi8(n, 1));
572 printf(" %02x", _mm256_extract_epi8(n, 2));
573 printf(" %02x", _mm256_extract_epi8(n, 3));
574 printf(" %02x", _mm256_extract_epi8(n, 4));
575 printf(" %02x", _mm256_extract_epi8(n, 5));
576 printf(" %02x", _mm256_extract_epi8(n, 6));
577 printf(" %02x", _mm256_extract_epi8(n, 7));
579 printf(" %02x", _mm256_extract_epi8(n, 8));
580 printf(" %02x", _mm256_extract_epi8(n, 9));
581 printf(" %02x", _mm256_extract_epi8(n, 10));
582 printf(" %02x", _mm256_extract_epi8(n, 11));
583 printf(" %02x", _mm256_extract_epi8(n, 12));
584 printf(" %02x", _mm256_extract_epi8(n, 13));
585 printf(" %02x", _mm256_extract_epi8(n, 14));
586 printf(" %02x", _mm256_extract_epi8(n, 15));
588 printf(" %02x", _mm256_extract_epi8(n, 16));
589 printf(" %02x", _mm256_extract_epi8(n, 17));
590 printf(" %02x", _mm256_extract_epi8(n, 18));
591 printf(" %02x", _mm256_extract_epi8(n, 19));
592 printf(" %02x", _mm256_extract_epi8(n, 20));
593 printf(" %02x", _mm256_extract_epi8(n, 21));
594 printf(" %02x", _mm256_extract_epi8(n, 22));
595 printf(" %02x", _mm256_extract_epi8(n, 23));
597 printf(" %02x", _mm256_extract_epi8(n, 24));
598 printf(" %02x", _mm256_extract_epi8(n, 25));
599 printf(" %02x", _mm256_extract_epi8(n, 26));
600 printf(" %02x", _mm256_extract_epi8(n, 27));
601 printf(" %02x", _mm256_extract_epi8(n, 28));
602 printf(" %02x", _mm256_extract_epi8(n, 29));
603 printf(" %02x", _mm256_extract_epi8(n, 30));
604 printf(" %02x", _mm256_extract_epi8(n, 31));
609 #ifndef HAS_MULTIVERSIONING
611 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
613 // No fast path possible unless we have multiversioning.
617 #else // defined(HAS_MULTIVERSIONING)
619 __attribute__((target("sse4.1")))
620 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
622 __attribute__((target("avx2")))
623 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
625 // Does a memcpy and memchr in one to reduce processing time.
626 // Note that the benefit is somewhat limited if your L3 cache is small,
627 // as you'll (unfortunately) spend most of the time loading the data
630 // Complicated cases are left to the slow path; it basically stops copying
631 // up until the first instance of "sync_char" (usually a bit before, actually).
632 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
633 // data, and what we really need this for is the 00 00 ff ff marker in video data.
634 __attribute__((target("default")))
635 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
637 // No fast path possible unless we have SSE 4.1 or higher.
641 __attribute__((target("sse4.1", "avx2")))
642 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
644 if (current_frame->data == nullptr ||
645 current_frame->len > current_frame->size ||
649 size_t orig_bytes = limit - start;
650 if (orig_bytes < 128) {
655 // Don't read more bytes than we can write.
656 limit = min(limit, start + (current_frame->size - current_frame->len));
658 // Align end to 32 bytes.
659 limit = (const uint8_t *)(intptr_t(limit) & ~31);
661 if (start >= limit) {
665 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
666 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
667 if (aligned_start != start) {
668 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
669 if (sync_start == nullptr) {
670 add_to_frame(current_frame, "", start, aligned_start);
672 add_to_frame(current_frame, "", start, sync_start);
677 // Make the length a multiple of 64.
678 if (current_frame->interleaved) {
679 if (((limit - aligned_start) % 64) != 0) {
682 assert(((limit - aligned_start) % 64) == 0);
685 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
688 __attribute__((target("avx2")))
689 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
691 const __m256i needle = _mm256_set1_epi8(sync_char);
694 const __restrict __m256i *in = (const __m256i *)aligned_start;
695 if (current_frame->interleaved) {
696 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
697 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
698 if (current_frame->len % 2 == 1) {
702 __m256i shuffle_cw = _mm256_set_epi8(
703 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
704 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
705 while (in < (const __m256i *)limit) {
706 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
707 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
708 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
710 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
711 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
712 __m256i found = _mm256_or_si256(found1, found2);
714 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
715 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
717 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
718 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
720 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
721 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
723 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
724 _mm256_storeu_si256(out2, hi);
726 if (!_mm256_testz_si256(found, found)) {
734 bytes_copied = (uint8_t *)in - aligned_start;
736 uint8_t *old_end = current_frame->data + current_frame->len;
737 __m256i *out = (__m256i *)old_end;
738 while (in < (const __m256i *)limit) {
739 __m256i data = _mm256_load_si256(in);
740 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
741 __m256i found = _mm256_cmpeq_epi8(data, needle);
742 if (!_mm256_testz_si256(found, found)) {
749 bytes_copied = (uint8_t *)out - old_end;
751 if (current_frame->data_copy != nullptr) {
752 // TODO: It would be somewhat more cache-efficient to write this in the
753 // same loop as above. However, it might not be worth the extra complexity.
754 memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
756 current_frame->len += bytes_copied;
758 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
759 return (const uint8_t *)in;
762 __attribute__((target("sse4.1")))
763 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
765 const __m128i needle = _mm_set1_epi8(sync_char);
767 const __m128i *in = (const __m128i *)aligned_start;
769 if (current_frame->interleaved) {
770 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
771 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
772 if (current_frame->len % 2 == 1) {
776 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
777 while (in < (const __m128i *)limit) {
778 __m128i data1 = _mm_load_si128(in);
779 __m128i data2 = _mm_load_si128(in + 1);
780 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
781 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
782 __m128i data1_hi = _mm_srli_epi16(data1, 8);
783 __m128i data2_hi = _mm_srli_epi16(data2, 8);
784 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
785 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
786 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
787 _mm_storeu_si128(out2, hi);
788 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
789 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
790 if (!_mm_testz_si128(found1, found1) ||
791 !_mm_testz_si128(found2, found2)) {
799 bytes_copied = (uint8_t *)in - aligned_start;
801 uint8_t *old_end = current_frame->data + current_frame->len;
802 __m128i *out = (__m128i *)old_end;
803 while (in < (const __m128i *)limit) {
804 __m128i data = _mm_load_si128(in);
805 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
806 __m128i found = _mm_cmpeq_epi8(data, needle);
807 if (!_mm_testz_si128(found, found)) {
814 bytes_copied = (uint8_t *)out - old_end;
816 if (current_frame->data_copy != nullptr) {
817 // TODO: It would be somewhat more cache-efficient to write this in the
818 // same loop as above. However, it might not be worth the extra complexity.
819 memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
821 current_frame->len += bytes_copied;
823 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
824 return (const uint8_t *)in;
827 #endif // defined(HAS_MULTIVERSIONING)
829 void decode_packs(const libusb_transfer *xfr,
830 const char *sync_pattern,
832 FrameAllocator::Frame *current_frame,
833 const char *frame_type_name,
834 function<void(const uint8_t *start)> start_callback)
837 for (int i = 0; i < xfr->num_iso_packets; i++) {
838 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
840 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
841 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
846 const uint8_t *start = xfr->buffer + offset;
847 const uint8_t *limit = start + pack->actual_length;
848 while (start < limit) { // Usually runs only one iteration.
849 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
850 if (start == limit) break;
851 assert(start < limit);
853 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
854 if (start_next_frame == nullptr) {
855 // add the rest of the buffer
856 add_to_frame(current_frame, frame_type_name, start, limit);
859 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
860 start = start_next_frame + sync_length; // skip sync
861 start_callback(start);
865 dump_pack(xfr, offset, pack);
867 offset += pack->length;
871 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
873 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
874 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
875 fprintf(stderr, "error: transfer status %d\n", xfr->status);
876 libusb_free_transfer(xfr);
880 assert(xfr->user_data != nullptr);
881 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
883 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
884 if (!usb->disconnected) {
885 fprintf(stderr, "Device went away, stopping transfers.\n");
886 usb->disconnected = true;
887 if (usb->card_disconnected_callback) {
888 usb->card_disconnected_callback();
891 // Don't reschedule the transfer; the loop will stop by itself.
895 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
896 if (xfr->endpoint == 0x84) {
897 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
899 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
901 // Update the transfer with the new assumed width, if we're in the process of changing formats.
902 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
905 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
906 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
907 uint8_t *buf = libusb_control_transfer_get_data(xfr);
909 if (setup->wIndex == 44) {
910 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
912 printf("read register %2d: 0x%02x%02x%02x%02x\n",
913 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
916 memcpy(usb->register_file + usb->current_register, buf, 4);
917 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
918 if (usb->current_register == 0) {
919 // read through all of them
920 printf("register dump:");
921 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
922 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
926 libusb_fill_control_setup(xfr->buffer,
927 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
928 /*index=*/usb->current_register, /*length=*/4);
933 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
934 for (i = 0; i < xfr->actual_length; i++) {
935 printf("%02x", xfr->buffer[i]);
945 int rc = libusb_submit_transfer(xfr);
947 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
952 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
954 if (card_connected_callback != nullptr) {
955 libusb_device_descriptor desc;
956 if (libusb_get_device_descriptor(dev, &desc) < 0) {
957 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
958 libusb_unref_device(dev);
962 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
963 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
964 card_connected_callback(dev); // Callback takes ownership.
968 libusb_unref_device(dev);
972 void BMUSBCapture::usb_thread_func()
975 memset(¶m, 0, sizeof(param));
976 param.sched_priority = 1;
977 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
978 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
980 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
981 while (!should_quit) {
982 timeval sec { 1, 0 };
983 int rc = libusb_handle_events_timeout(nullptr, &sec);
984 if (rc != LIBUSB_SUCCESS)
991 struct USBCardDevice {
994 libusb_device *device;
997 const char *get_product_name(uint16_t product)
999 if (product == 0xbd3b) {
1000 return "Intensity Shuttle";
1001 } else if (product == 0xbd4f) {
1002 return "UltraStudio SDI";
1009 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
1011 const char *product_name = get_product_name(product);
1014 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
1015 id, bus, port, product_name);
1019 vector<USBCardDevice> find_all_cards()
1021 libusb_device **devices;
1022 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
1023 if (num_devices == -1) {
1024 fprintf(stderr, "Error finding USB devices\n");
1027 vector<USBCardDevice> found_cards;
1028 for (ssize_t i = 0; i < num_devices; ++i) {
1029 libusb_device_descriptor desc;
1030 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
1031 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
1035 uint8_t bus = libusb_get_bus_number(devices[i]);
1036 uint8_t port = libusb_get_port_number(devices[i]);
1038 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
1039 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
1040 libusb_unref_device(devices[i]);
1044 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
1046 libusb_free_device_list(devices, 0);
1048 // Sort the devices to get a consistent ordering.
1049 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
1050 if (a.product != b.product)
1051 return a.product < b.product;
1053 return a.bus < b.bus;
1054 return a.port < b.port;
1060 libusb_device_handle *open_card(int card_index, string *description)
1062 vector<USBCardDevice> found_cards = find_all_cards();
1064 for (size_t i = 0; i < found_cards.size(); ++i) {
1065 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1066 fprintf(stderr, "%s\n", tmp_description.c_str());
1067 if (i == size_t(card_index)) {
1068 *description = tmp_description;
1072 if (size_t(card_index) >= found_cards.size()) {
1073 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1077 libusb_device_handle *devh;
1078 int rc = libusb_open(found_cards[card_index].device, &devh);
1080 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1084 for (size_t i = 0; i < found_cards.size(); ++i) {
1085 libusb_unref_device(found_cards[i].device);
1091 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1093 uint8_t bus = libusb_get_bus_number(dev);
1094 uint8_t port = libusb_get_port_number(dev);
1096 libusb_device_descriptor desc;
1097 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1098 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1102 *description = get_card_description(card_index, bus, port, desc.idProduct);
1104 libusb_device_handle *devh;
1105 int rc = libusb_open(dev, &devh);
1107 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1116 unsigned BMUSBCapture::num_cards()
1118 int rc = libusb_init(nullptr);
1120 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1124 vector<USBCardDevice> found_cards = find_all_cards();
1125 unsigned ret = found_cards.size();
1126 for (size_t i = 0; i < found_cards.size(); ++i) {
1127 libusb_unref_device(found_cards[i].device);
1132 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1134 current_pixel_format = pixel_format;
1135 update_capture_mode();
1138 void BMUSBCapture::configure_card()
1140 if (video_frame_allocator == nullptr) {
1141 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1142 set_video_frame_allocator(owned_video_frame_allocator.get());
1144 if (audio_frame_allocator == nullptr) {
1145 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1146 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1148 dequeue_thread_should_quit = false;
1149 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1152 struct libusb_transfer *xfr;
1154 rc = libusb_init(nullptr);
1156 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1160 if (dev == nullptr) {
1161 devh = open_card(card_index, &description);
1163 devh = open_card(card_index, dev, &description);
1164 libusb_unref_device(dev);
1167 fprintf(stderr, "Error finding USB device\n");
1171 libusb_config_descriptor *config;
1172 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1174 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1179 printf("%d interface\n", config->bNumInterfaces);
1180 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1181 printf(" interface %d\n", interface_number);
1182 const libusb_interface *interface = &config->interface[interface_number];
1183 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1184 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1185 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1186 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1187 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1188 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1194 rc = libusb_set_configuration(devh, /*configuration=*/1);
1196 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1200 rc = libusb_claim_interface(devh, 0);
1202 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1206 // Alternate setting 1 is output, alternate setting 2 is input.
1207 // Card is reset when switching alternates, so the driver uses
1208 // this “double switch” when it wants to reset.
1210 // There's also alternate settings 3 and 4, which seem to be
1211 // like 1 and 2 except they advertise less bandwidth needed.
1212 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1214 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1215 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1216 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1217 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1218 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1222 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1224 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1228 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1230 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1236 rc = libusb_claim_interface(devh, 3);
1238 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1244 // 44 is some kind of timer register (first 16 bits count upwards)
1245 // 24 is some sort of watchdog?
1246 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1247 // (or will go to 0x73c60010?), also seen 0x73c60100
1248 // 12 also changes all the time, unclear why
1249 // 16 seems to be autodetected mode somehow
1250 // -- this is e00115e0 after reset?
1251 // ed0115e0 after mode change [to output?]
1252 // 2d0015e0 after more mode change [to input]
1253 // ed0115e0 after more mode change
1254 // 2d0015e0 after more mode change
1256 // 390115e0 seems to indicate we have signal
1257 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1259 // 200015e0 on startup
1260 // changes to 250115e0 when we sync to the signal
1262 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1264 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1266 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1267 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1269 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1270 // perhaps some of them are related to analog output?
1272 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1273 // but the driver sets it to 0x8036802a at some point.
1275 // all of this is on request 214/215. other requests (192, 219,
1276 // 222, 223, 224) are used for firmware upgrade. Probably best to
1277 // stay out of it unless you know what you're doing.
1281 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1284 // 0x01 - stable signal
1285 // 0x04 - deep color
1286 // 0x08 - unknown (audio??)
1290 update_capture_mode();
1298 static const ctrl ctrls[] = {
1299 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1300 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1302 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1303 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1304 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1305 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1308 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1309 uint32_t flipped = htonl(ctrls[req].data);
1310 static uint8_t value[4];
1311 memcpy(value, &flipped, sizeof(flipped));
1312 int size = sizeof(value);
1313 //if (ctrls[req].request == 215) size = 0;
1314 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1315 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1317 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1321 if (ctrls[req].index == 16 && rc == 4) {
1322 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1326 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1327 for (int i = 0; i < rc; ++i) {
1328 printf("%02x", value[i]);
1337 static int my_index = 0;
1338 static uint8_t value[4];
1339 int size = sizeof(value);
1340 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1341 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1343 fprintf(stderr, "Error on control\n");
1346 printf("rc=%d index=%d: 0x", rc, my_index);
1347 for (int i = 0; i < rc; ++i) {
1348 printf("%02x", value[i]);
1355 // set up an asynchronous transfer of the timer register
1356 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1357 static int completed = 0;
1359 xfr = libusb_alloc_transfer(0);
1360 libusb_fill_control_setup(cmdbuf,
1361 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1362 /*index=*/44, /*length=*/4);
1363 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1364 xfr->user_data = this;
1365 libusb_submit_transfer(xfr);
1367 // set up an asynchronous transfer of register 24
1368 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1369 static int completed2 = 0;
1371 xfr = libusb_alloc_transfer(0);
1372 libusb_fill_control_setup(cmdbuf2,
1373 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1374 /*index=*/24, /*length=*/4);
1375 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1376 xfr->user_data = this;
1377 libusb_submit_transfer(xfr);
1380 // set up an asynchronous transfer of the register dump
1381 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1382 static int completed3 = 0;
1384 xfr = libusb_alloc_transfer(0);
1385 libusb_fill_control_setup(cmdbuf3,
1386 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1387 /*index=*/current_register, /*length=*/4);
1388 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1389 xfr->user_data = this;
1390 //libusb_submit_transfer(xfr);
1392 //audiofp = fopen("audio.raw", "wb");
1394 // set up isochronous transfers for audio and video
1395 for (int e = 3; e <= 4; ++e) {
1396 int num_transfers = 6;
1397 for (int i = 0; i < num_transfers; ++i) {
1399 int num_iso_pack, size;
1401 // Allocate for minimum width (because that will give us the most
1402 // number of packets, so we don't need to reallocate, but we'll
1403 // default to 720p for the first frame.
1404 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1405 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1406 buf_size = USB_VIDEO_TRANSFER_SIZE;
1410 buf_size = num_iso_pack * size;
1412 int num_bytes = num_iso_pack * size;
1413 assert(size_t(num_bytes) <= buf_size);
1414 #if LIBUSB_API_VERSION >= 0x01000105
1415 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1417 uint8_t *buf = nullptr;
1419 if (buf == nullptr) {
1420 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1421 #if LIBUSB_API_VERSION >= 0x01000105
1422 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1424 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1426 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1427 buf = new uint8_t[num_bytes];
1430 xfr = libusb_alloc_transfer(num_iso_pack);
1432 fprintf(stderr, "oom\n");
1436 int ep = LIBUSB_ENDPOINT_IN | e;
1437 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1438 num_iso_pack, cb_xfr, nullptr, 0);
1439 libusb_set_iso_packet_lengths(xfr, size);
1440 xfr->user_data = this;
1443 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1446 iso_xfrs.push_back(xfr);
1451 void BMUSBCapture::start_bm_capture()
1454 for (libusb_transfer *xfr : iso_xfrs) {
1455 int rc = libusb_submit_transfer(xfr);
1458 //printf("num_bytes=%d\n", num_bytes);
1459 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1460 xfr->endpoint, i, libusb_error_name(rc));
1467 libusb_release_interface(devh, 0);
1471 libusb_exit(nullptr);
1476 void BMUSBCapture::stop_dequeue_thread()
1478 dequeue_thread_should_quit = true;
1479 queues_not_empty.notify_all();
1480 dequeue_thread.join();
1483 void BMUSBCapture::start_bm_thread()
1485 // Devices leaving are discovered by seeing the isochronous packets
1486 // coming back with errors, so only care about devices joining.
1487 if (card_connected_callback != nullptr) {
1488 if (libusb_hotplug_register_callback(
1489 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1490 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1491 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1492 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1497 should_quit = false;
1498 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1501 void BMUSBCapture::stop_bm_thread()
1504 libusb_interrupt_event_handler(nullptr);
1508 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1510 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1511 VideoMode auto_mode;
1512 auto_mode.name = "Autodetect";
1513 auto_mode.autodetect = true;
1514 return {{ 0, auto_mode }};
1517 uint32_t BMUSBCapture::get_current_video_mode() const
1519 return 0; // Matches get_available_video_modes().
1522 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1524 assert(video_mode_id == 0); // Matches get_available_video_modes().
1527 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1530 { 0x00000000, "HDMI/SDI" },
1531 { 0x02000000, "Component" },
1532 { 0x04000000, "Composite" },
1533 { 0x06000000, "S-video" }
1537 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1539 assert((video_input_id & ~0x06000000) == 0);
1540 current_video_input = video_input_id;
1541 update_capture_mode();
1544 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1547 { 0x00000000, "Embedded" },
1548 { 0x10000000, "Analog" }
1552 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1554 assert((audio_input_id & ~0x10000000) == 0);
1555 current_audio_input = audio_input_id;
1556 update_capture_mode();
1559 void BMUSBCapture::update_capture_mode()
1561 if (devh == nullptr) {
1565 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1566 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1567 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1568 mode |= htonl(0x20000000);
1570 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1573 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1574 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1576 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1581 } // namespace bmusb