1 // Intensity Shuttle USB3 capture driver, v0.7.2
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
203 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
204 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
205 { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
206 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
207 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
208 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
209 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
210 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
211 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
212 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
213 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
214 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
215 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
216 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
218 for (const VideoFormatEntry &entry : entries) {
219 if (normalized_video_format == entry.normalized_video_format) {
220 decoded_video_format->width = entry.width;
221 decoded_video_format->height = entry.height;
222 if (video_format & 0x0800) {
223 decoded_video_format->stride = entry.width * 2;
225 decoded_video_format->stride = v210_stride(entry.width);
227 decoded_video_format->second_field_start = entry.second_field_start;
228 decoded_video_format->extra_lines_top = entry.extra_lines_top;
229 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
230 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
231 decoded_video_format->frame_rate_den = entry.frame_rate_den;
232 decoded_video_format->interlaced = entry.interlaced;
237 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
238 decoded_video_format->width = 1280;
239 decoded_video_format->height = 720;
240 decoded_video_format->stride = 1280 * 2;
241 decoded_video_format->frame_rate_nom = 60;
242 decoded_video_format->frame_rate_den = 1;
246 // There are seemingly no direct indicators of sample rate; you just get
247 // one frame's worth and have to guess from that.
248 int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
250 size_t num_samples = len / 3 / 8;
251 size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
253 // See if we match or are very close to any of the mandatory HDMI sample rates.
254 const int candidate_sample_rates[] = { 32000, 44100, 48000 };
255 for (int rate : candidate_sample_rates) {
256 if (abs(int(num_samples_per_second) - rate) <= 100) {
261 fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
262 num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
268 FrameAllocator::~FrameAllocator() {}
270 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
271 : frame_size(frame_size)
273 for (size_t i = 0; i < num_queued_frames; ++i) {
274 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
278 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
283 unique_lock<mutex> lock(freelist_mutex); // Meh.
284 if (freelist.empty()) {
285 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
288 vf.data = freelist.top().release();
289 vf.size = frame_size;
290 freelist.pop(); // Meh.
295 void MallocFrameAllocator::release_frame(Frame frame)
297 if (frame.overflow > 0) {
298 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
300 unique_lock<mutex> lock(freelist_mutex);
301 freelist.push(unique_ptr<uint8_t[]>(frame.data));
304 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
309 return (b - a < 0x8000);
311 int wrap_b = 0x10000 + int(b);
312 return (wrap_b - a < 0x8000);
316 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
318 unique_lock<mutex> lock(queue_lock);
319 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
320 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
321 q->back().timecode, timecode);
322 frame.owner->release_frame(frame);
328 qf.timecode = timecode;
330 q->push_back(move(qf));
331 queues_not_empty.notify_one(); // might be spurious
334 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
336 FILE *fp = fopen(filename, "wb");
337 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
338 printf("short write!\n");
343 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
345 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
348 void BMUSBCapture::dequeue_thread_func()
350 char thread_name[16];
351 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
352 pthread_setname_np(pthread_self(), thread_name);
354 if (has_dequeue_callbacks) {
355 dequeue_init_callback();
357 size_t last_sample_rate = 48000;
358 while (!dequeue_thread_should_quit) {
359 unique_lock<mutex> lock(queue_lock);
360 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
362 if (dequeue_thread_should_quit) break;
364 uint16_t video_timecode = pending_video_frames.front().timecode;
365 uint16_t audio_timecode = pending_audio_frames.front().timecode;
366 AudioFormat audio_format;
367 audio_format.bits_per_sample = 24;
368 audio_format.num_channels = 8;
369 audio_format.sample_rate = last_sample_rate;
370 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
371 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
373 QueuedFrame video_frame = pending_video_frames.front();
374 pending_video_frames.pop_front();
376 video_frame_allocator->release_frame(video_frame.frame);
377 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
378 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
380 QueuedFrame audio_frame = pending_audio_frames.front();
381 pending_audio_frames.pop_front();
383 audio_format.id = audio_frame.format;
385 // Use the video format of the pending frame.
386 QueuedFrame video_frame = pending_video_frames.front();
387 VideoFormat video_format;
388 decode_video_format(video_frame.format, &video_format);
390 frame_callback(audio_timecode,
391 FrameAllocator::Frame(), 0, video_format,
392 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
394 QueuedFrame video_frame = pending_video_frames.front();
395 QueuedFrame audio_frame = pending_audio_frames.front();
396 pending_audio_frames.pop_front();
397 pending_video_frames.pop_front();
402 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
403 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
404 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
407 VideoFormat video_format;
408 audio_format.id = audio_frame.format;
409 if (decode_video_format(video_frame.format, &video_format)) {
410 if (audio_frame.frame.len != 0) {
411 audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
412 last_sample_rate = audio_format.sample_rate;
414 frame_callback(video_timecode,
415 video_frame.frame, HEADER_SIZE, video_format,
416 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
418 video_frame_allocator->release_frame(video_frame.frame);
419 audio_format.sample_rate = last_sample_rate;
420 frame_callback(video_timecode,
421 FrameAllocator::Frame(), 0, video_format,
422 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
426 if (has_dequeue_callbacks) {
427 dequeue_cleanup_callback();
431 void BMUSBCapture::start_new_frame(const uint8_t *start)
433 uint16_t format = (start[3] << 8) | start[2];
434 uint16_t timecode = (start[1] << 8) | start[0];
436 if (current_video_frame.len > 0) {
437 current_video_frame.received_timestamp = steady_clock::now();
439 // If format is 0x0800 (no signal), add a fake (empty) audio
440 // frame to get it out of the queue.
441 // TODO: Figure out if there are other formats that come with
442 // no audio, and treat them the same.
443 if (format == 0x0800) {
444 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
445 if (fake_audio_frame.data == nullptr) {
446 // Oh well, it's just a no-signal frame anyway.
447 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
448 current_video_frame.owner->release_frame(current_video_frame);
449 current_video_frame = video_frame_allocator->alloc_frame();
452 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
455 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
457 // Update the assumed frame width. We might be one frame too late on format changes,
458 // but it's much better than asking the user to choose manually.
459 VideoFormat video_format;
460 if (decode_video_format(format, &video_format)) {
461 assumed_frame_width = video_format.width;
464 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
466 // //start[7], start[6], start[5], start[4],
467 // read_current_frame, FRAME_SIZE);
469 current_video_frame = video_frame_allocator->alloc_frame();
470 //if (current_video_frame.data == nullptr) {
471 // read_current_frame = -1;
473 // read_current_frame = 0;
477 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
479 uint16_t format = (start[3] << 8) | start[2];
480 uint16_t timecode = (start[1] << 8) | start[0];
481 if (current_audio_frame.len > 0) {
482 current_audio_frame.received_timestamp = steady_clock::now();
483 //dump_audio_block();
484 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
486 //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
487 // format, timecode);
488 current_audio_frame = audio_frame_allocator->alloc_frame();
492 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
494 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
495 for (unsigned j = 0; j < pack->actual_length; j++) {
496 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
497 printf("%02x", xfr->buffer[j + offset]);
500 else if ((j % 8) == 7)
508 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
511 uint8_t *dptr1 = dest1;
512 uint8_t *dptr2 = dest2;
514 for (size_t i = 0; i < n; i += 2) {
520 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
522 if (current_frame->data == nullptr ||
523 current_frame->len > current_frame->size ||
528 int bytes = end - start;
529 if (current_frame->len + bytes > current_frame->size) {
530 current_frame->overflow = current_frame->len + bytes - current_frame->size;
531 current_frame->len = current_frame->size;
532 if (current_frame->overflow > 1048576) {
533 printf("%d bytes overflow after last %s frame\n",
534 int(current_frame->overflow), frame_type_name);
535 current_frame->overflow = 0;
539 if (current_frame->data_copy != nullptr) {
540 memcpy(current_frame->data_copy + current_frame->len, start, bytes);
542 if (current_frame->interleaved) {
543 uint8_t *data = current_frame->data + current_frame->len / 2;
544 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
545 if (current_frame->len % 2 == 1) {
549 if (bytes % 2 == 1) {
552 ++current_frame->len;
555 memcpy_interleaved(data, data2, start, bytes);
556 current_frame->len += bytes;
558 memcpy(current_frame->data + current_frame->len, start, bytes);
559 current_frame->len += bytes;
565 void avx2_dump(const char *name, __m256i n)
567 printf("%-10s:", name);
568 printf(" %02x", _mm256_extract_epi8(n, 0));
569 printf(" %02x", _mm256_extract_epi8(n, 1));
570 printf(" %02x", _mm256_extract_epi8(n, 2));
571 printf(" %02x", _mm256_extract_epi8(n, 3));
572 printf(" %02x", _mm256_extract_epi8(n, 4));
573 printf(" %02x", _mm256_extract_epi8(n, 5));
574 printf(" %02x", _mm256_extract_epi8(n, 6));
575 printf(" %02x", _mm256_extract_epi8(n, 7));
577 printf(" %02x", _mm256_extract_epi8(n, 8));
578 printf(" %02x", _mm256_extract_epi8(n, 9));
579 printf(" %02x", _mm256_extract_epi8(n, 10));
580 printf(" %02x", _mm256_extract_epi8(n, 11));
581 printf(" %02x", _mm256_extract_epi8(n, 12));
582 printf(" %02x", _mm256_extract_epi8(n, 13));
583 printf(" %02x", _mm256_extract_epi8(n, 14));
584 printf(" %02x", _mm256_extract_epi8(n, 15));
586 printf(" %02x", _mm256_extract_epi8(n, 16));
587 printf(" %02x", _mm256_extract_epi8(n, 17));
588 printf(" %02x", _mm256_extract_epi8(n, 18));
589 printf(" %02x", _mm256_extract_epi8(n, 19));
590 printf(" %02x", _mm256_extract_epi8(n, 20));
591 printf(" %02x", _mm256_extract_epi8(n, 21));
592 printf(" %02x", _mm256_extract_epi8(n, 22));
593 printf(" %02x", _mm256_extract_epi8(n, 23));
595 printf(" %02x", _mm256_extract_epi8(n, 24));
596 printf(" %02x", _mm256_extract_epi8(n, 25));
597 printf(" %02x", _mm256_extract_epi8(n, 26));
598 printf(" %02x", _mm256_extract_epi8(n, 27));
599 printf(" %02x", _mm256_extract_epi8(n, 28));
600 printf(" %02x", _mm256_extract_epi8(n, 29));
601 printf(" %02x", _mm256_extract_epi8(n, 30));
602 printf(" %02x", _mm256_extract_epi8(n, 31));
607 #ifndef HAS_MULTIVERSIONING
609 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
611 // No fast path possible unless we have multiversioning.
615 #else // defined(HAS_MULTIVERSIONING)
617 __attribute__((target("sse4.1")))
618 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
620 __attribute__((target("avx2")))
621 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
623 // Does a memcpy and memchr in one to reduce processing time.
624 // Note that the benefit is somewhat limited if your L3 cache is small,
625 // as you'll (unfortunately) spend most of the time loading the data
628 // Complicated cases are left to the slow path; it basically stops copying
629 // up until the first instance of "sync_char" (usually a bit before, actually).
630 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
631 // data, and what we really need this for is the 00 00 ff ff marker in video data.
632 __attribute__((target("default")))
633 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
635 // No fast path possible unless we have SSE 4.1 or higher.
639 __attribute__((target("sse4.1", "avx2")))
640 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
642 if (current_frame->data == nullptr ||
643 current_frame->len > current_frame->size ||
647 size_t orig_bytes = limit - start;
648 if (orig_bytes < 128) {
653 // Don't read more bytes than we can write.
654 limit = min(limit, start + (current_frame->size - current_frame->len));
656 // Align end to 32 bytes.
657 limit = (const uint8_t *)(intptr_t(limit) & ~31);
659 if (start >= limit) {
663 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
664 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
665 if (aligned_start != start) {
666 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
667 if (sync_start == nullptr) {
668 add_to_frame(current_frame, "", start, aligned_start);
670 add_to_frame(current_frame, "", start, sync_start);
675 // Make the length a multiple of 64.
676 if (current_frame->interleaved) {
677 if (((limit - aligned_start) % 64) != 0) {
680 assert(((limit - aligned_start) % 64) == 0);
683 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
686 __attribute__((target("avx2")))
687 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
689 const __m256i needle = _mm256_set1_epi8(sync_char);
692 const __restrict __m256i *in = (const __m256i *)aligned_start;
693 if (current_frame->interleaved) {
694 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
695 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
696 if (current_frame->len % 2 == 1) {
700 __m256i shuffle_cw = _mm256_set_epi8(
701 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
702 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
703 while (in < (const __m256i *)limit) {
704 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
705 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
706 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
708 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
709 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
710 __m256i found = _mm256_or_si256(found1, found2);
712 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
713 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
715 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
716 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
718 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
719 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
721 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
722 _mm256_storeu_si256(out2, hi);
724 if (!_mm256_testz_si256(found, found)) {
732 bytes_copied = (uint8_t *)in - aligned_start;
734 uint8_t *old_end = current_frame->data + current_frame->len;
735 __m256i *out = (__m256i *)old_end;
736 while (in < (const __m256i *)limit) {
737 __m256i data = _mm256_load_si256(in);
738 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
739 __m256i found = _mm256_cmpeq_epi8(data, needle);
740 if (!_mm256_testz_si256(found, found)) {
747 bytes_copied = (uint8_t *)out - old_end;
749 if (current_frame->data_copy != nullptr) {
750 // TODO: It would be somewhat more cache-efficient to write this in the
751 // same loop as above. However, it might not be worth the extra complexity.
752 memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
754 current_frame->len += bytes_copied;
756 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
757 return (const uint8_t *)in;
760 __attribute__((target("sse4.1")))
761 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
763 const __m128i needle = _mm_set1_epi8(sync_char);
765 const __m128i *in = (const __m128i *)aligned_start;
767 if (current_frame->interleaved) {
768 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
769 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
770 if (current_frame->len % 2 == 1) {
774 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
775 while (in < (const __m128i *)limit) {
776 __m128i data1 = _mm_load_si128(in);
777 __m128i data2 = _mm_load_si128(in + 1);
778 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
779 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
780 __m128i data1_hi = _mm_srli_epi16(data1, 8);
781 __m128i data2_hi = _mm_srli_epi16(data2, 8);
782 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
783 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
784 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
785 _mm_storeu_si128(out2, hi);
786 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
787 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
788 if (!_mm_testz_si128(found1, found1) ||
789 !_mm_testz_si128(found2, found2)) {
797 bytes_copied = (uint8_t *)in - aligned_start;
799 uint8_t *old_end = current_frame->data + current_frame->len;
800 __m128i *out = (__m128i *)old_end;
801 while (in < (const __m128i *)limit) {
802 __m128i data = _mm_load_si128(in);
803 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
804 __m128i found = _mm_cmpeq_epi8(data, needle);
805 if (!_mm_testz_si128(found, found)) {
812 bytes_copied = (uint8_t *)out - old_end;
814 if (current_frame->data_copy != nullptr) {
815 // TODO: It would be somewhat more cache-efficient to write this in the
816 // same loop as above. However, it might not be worth the extra complexity.
817 memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
819 current_frame->len += bytes_copied;
821 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
822 return (const uint8_t *)in;
825 #endif // defined(HAS_MULTIVERSIONING)
827 void decode_packs(const libusb_transfer *xfr,
828 const char *sync_pattern,
830 FrameAllocator::Frame *current_frame,
831 const char *frame_type_name,
832 function<void(const uint8_t *start)> start_callback)
835 for (int i = 0; i < xfr->num_iso_packets; i++) {
836 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
838 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
839 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
844 const uint8_t *start = xfr->buffer + offset;
845 const uint8_t *limit = start + pack->actual_length;
846 while (start < limit) { // Usually runs only one iteration.
847 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
848 if (start == limit) break;
849 assert(start < limit);
851 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
852 if (start_next_frame == nullptr) {
853 // add the rest of the buffer
854 add_to_frame(current_frame, frame_type_name, start, limit);
857 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
858 start = start_next_frame + sync_length; // skip sync
859 start_callback(start);
863 dump_pack(xfr, offset, pack);
865 offset += pack->length;
869 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
871 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
872 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
873 fprintf(stderr, "error: transfer status %d\n", xfr->status);
874 libusb_free_transfer(xfr);
878 assert(xfr->user_data != nullptr);
879 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
881 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
882 if (!usb->disconnected) {
883 fprintf(stderr, "Device went away, stopping transfers.\n");
884 usb->disconnected = true;
885 if (usb->card_disconnected_callback) {
886 usb->card_disconnected_callback();
889 // Don't reschedule the transfer; the loop will stop by itself.
893 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
894 if (xfr->endpoint == 0x84) {
895 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
897 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
899 // Update the transfer with the new assumed width, if we're in the process of changing formats.
900 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
903 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
904 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
905 uint8_t *buf = libusb_control_transfer_get_data(xfr);
907 if (setup->wIndex == 44) {
908 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
910 printf("read register %2d: 0x%02x%02x%02x%02x\n",
911 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
914 memcpy(usb->register_file + usb->current_register, buf, 4);
915 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
916 if (usb->current_register == 0) {
917 // read through all of them
918 printf("register dump:");
919 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
920 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
924 libusb_fill_control_setup(xfr->buffer,
925 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
926 /*index=*/usb->current_register, /*length=*/4);
931 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
932 for (i = 0; i < xfr->actual_length; i++) {
933 printf("%02x", xfr->buffer[i]);
943 int rc = libusb_submit_transfer(xfr);
945 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
950 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
952 if (card_connected_callback != nullptr) {
953 libusb_device_descriptor desc;
954 if (libusb_get_device_descriptor(dev, &desc) < 0) {
955 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
956 libusb_unref_device(dev);
960 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
961 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
962 card_connected_callback(dev); // Callback takes ownership.
966 libusb_unref_device(dev);
970 void BMUSBCapture::usb_thread_func()
973 memset(¶m, 0, sizeof(param));
974 param.sched_priority = 1;
975 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
976 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
978 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
979 while (!should_quit) {
980 timeval sec { 1, 0 };
981 int rc = libusb_handle_events_timeout(nullptr, &sec);
982 if (rc != LIBUSB_SUCCESS)
989 struct USBCardDevice {
992 libusb_device *device;
995 const char *get_product_name(uint16_t product)
997 if (product == 0xbd3b) {
998 return "Intensity Shuttle";
999 } else if (product == 0xbd4f) {
1000 return "UltraStudio SDI";
1007 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
1009 const char *product_name = get_product_name(product);
1012 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
1013 id, bus, port, product_name);
1017 vector<USBCardDevice> find_all_cards()
1019 libusb_device **devices;
1020 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
1021 if (num_devices == -1) {
1022 fprintf(stderr, "Error finding USB devices\n");
1025 vector<USBCardDevice> found_cards;
1026 for (ssize_t i = 0; i < num_devices; ++i) {
1027 libusb_device_descriptor desc;
1028 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
1029 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
1033 uint8_t bus = libusb_get_bus_number(devices[i]);
1034 uint8_t port = libusb_get_port_number(devices[i]);
1036 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
1037 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
1038 libusb_unref_device(devices[i]);
1042 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
1044 libusb_free_device_list(devices, 0);
1046 // Sort the devices to get a consistent ordering.
1047 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
1048 if (a.product != b.product)
1049 return a.product < b.product;
1051 return a.bus < b.bus;
1052 return a.port < b.port;
1058 libusb_device_handle *open_card(int card_index, string *description)
1060 vector<USBCardDevice> found_cards = find_all_cards();
1062 for (size_t i = 0; i < found_cards.size(); ++i) {
1063 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1064 fprintf(stderr, "%s\n", tmp_description.c_str());
1065 if (i == size_t(card_index)) {
1066 *description = tmp_description;
1070 if (size_t(card_index) >= found_cards.size()) {
1071 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1075 libusb_device_handle *devh;
1076 int rc = libusb_open(found_cards[card_index].device, &devh);
1078 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1082 for (size_t i = 0; i < found_cards.size(); ++i) {
1083 libusb_unref_device(found_cards[i].device);
1089 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1091 uint8_t bus = libusb_get_bus_number(dev);
1092 uint8_t port = libusb_get_port_number(dev);
1094 libusb_device_descriptor desc;
1095 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1096 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1100 *description = get_card_description(card_index, bus, port, desc.idProduct);
1102 libusb_device_handle *devh;
1103 int rc = libusb_open(dev, &devh);
1105 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1114 unsigned BMUSBCapture::num_cards()
1116 int rc = libusb_init(nullptr);
1118 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1122 vector<USBCardDevice> found_cards = find_all_cards();
1123 unsigned ret = found_cards.size();
1124 for (size_t i = 0; i < found_cards.size(); ++i) {
1125 libusb_unref_device(found_cards[i].device);
1130 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1132 current_pixel_format = pixel_format;
1133 update_capture_mode();
1136 void BMUSBCapture::configure_card()
1138 if (video_frame_allocator == nullptr) {
1139 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1140 set_video_frame_allocator(owned_video_frame_allocator.get());
1142 if (audio_frame_allocator == nullptr) {
1143 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1144 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1146 dequeue_thread_should_quit = false;
1147 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1150 struct libusb_transfer *xfr;
1152 rc = libusb_init(nullptr);
1154 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1158 if (dev == nullptr) {
1159 devh = open_card(card_index, &description);
1161 devh = open_card(card_index, dev, &description);
1162 libusb_unref_device(dev);
1165 fprintf(stderr, "Error finding USB device\n");
1169 libusb_config_descriptor *config;
1170 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1172 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1177 printf("%d interface\n", config->bNumInterfaces);
1178 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1179 printf(" interface %d\n", interface_number);
1180 const libusb_interface *interface = &config->interface[interface_number];
1181 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1182 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1183 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1184 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1185 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1186 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1192 rc = libusb_set_configuration(devh, /*configuration=*/1);
1194 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1198 rc = libusb_claim_interface(devh, 0);
1200 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1204 // Alternate setting 1 is output, alternate setting 2 is input.
1205 // Card is reset when switching alternates, so the driver uses
1206 // this “double switch” when it wants to reset.
1208 // There's also alternate settings 3 and 4, which seem to be
1209 // like 1 and 2 except they advertise less bandwidth needed.
1210 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1212 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1213 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1214 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1215 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1216 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1220 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1222 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1226 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1228 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1234 rc = libusb_claim_interface(devh, 3);
1236 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1242 // 44 is some kind of timer register (first 16 bits count upwards)
1243 // 24 is some sort of watchdog?
1244 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1245 // (or will go to 0x73c60010?), also seen 0x73c60100
1246 // 12 also changes all the time, unclear why
1247 // 16 seems to be autodetected mode somehow
1248 // -- this is e00115e0 after reset?
1249 // ed0115e0 after mode change [to output?]
1250 // 2d0015e0 after more mode change [to input]
1251 // ed0115e0 after more mode change
1252 // 2d0015e0 after more mode change
1254 // 390115e0 seems to indicate we have signal
1255 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1257 // 200015e0 on startup
1258 // changes to 250115e0 when we sync to the signal
1260 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1262 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1264 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1265 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1267 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1268 // perhaps some of them are related to analog output?
1270 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1271 // but the driver sets it to 0x8036802a at some point.
1273 // all of this is on request 214/215. other requests (192, 219,
1274 // 222, 223, 224) are used for firmware upgrade. Probably best to
1275 // stay out of it unless you know what you're doing.
1279 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1282 // 0x01 - stable signal
1283 // 0x04 - deep color
1284 // 0x08 - unknown (audio??)
1288 update_capture_mode();
1296 static const ctrl ctrls[] = {
1297 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1298 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1300 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1301 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1302 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1303 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1306 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1307 uint32_t flipped = htonl(ctrls[req].data);
1308 static uint8_t value[4];
1309 memcpy(value, &flipped, sizeof(flipped));
1310 int size = sizeof(value);
1311 //if (ctrls[req].request == 215) size = 0;
1312 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1313 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1315 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1319 if (ctrls[req].index == 16 && rc == 4) {
1320 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1324 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1325 for (int i = 0; i < rc; ++i) {
1326 printf("%02x", value[i]);
1335 static int my_index = 0;
1336 static uint8_t value[4];
1337 int size = sizeof(value);
1338 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1339 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1341 fprintf(stderr, "Error on control\n");
1344 printf("rc=%d index=%d: 0x", rc, my_index);
1345 for (int i = 0; i < rc; ++i) {
1346 printf("%02x", value[i]);
1353 // set up an asynchronous transfer of the timer register
1354 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1355 static int completed = 0;
1357 xfr = libusb_alloc_transfer(0);
1358 libusb_fill_control_setup(cmdbuf,
1359 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1360 /*index=*/44, /*length=*/4);
1361 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1362 xfr->user_data = this;
1363 libusb_submit_transfer(xfr);
1365 // set up an asynchronous transfer of register 24
1366 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1367 static int completed2 = 0;
1369 xfr = libusb_alloc_transfer(0);
1370 libusb_fill_control_setup(cmdbuf2,
1371 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1372 /*index=*/24, /*length=*/4);
1373 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1374 xfr->user_data = this;
1375 libusb_submit_transfer(xfr);
1378 // set up an asynchronous transfer of the register dump
1379 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1380 static int completed3 = 0;
1382 xfr = libusb_alloc_transfer(0);
1383 libusb_fill_control_setup(cmdbuf3,
1384 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1385 /*index=*/current_register, /*length=*/4);
1386 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1387 xfr->user_data = this;
1388 //libusb_submit_transfer(xfr);
1390 //audiofp = fopen("audio.raw", "wb");
1392 // set up isochronous transfers for audio and video
1393 for (int e = 3; e <= 4; ++e) {
1394 int num_transfers = 6;
1395 for (int i = 0; i < num_transfers; ++i) {
1397 int num_iso_pack, size;
1399 // Allocate for minimum width (because that will give us the most
1400 // number of packets, so we don't need to reallocate, but we'll
1401 // default to 720p for the first frame.
1402 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1403 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1404 buf_size = USB_VIDEO_TRANSFER_SIZE;
1408 buf_size = num_iso_pack * size;
1410 int num_bytes = num_iso_pack * size;
1411 assert(size_t(num_bytes) <= buf_size);
1412 #if LIBUSB_API_VERSION >= 0x01000105
1413 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1415 uint8_t *buf = nullptr;
1417 if (buf == nullptr) {
1418 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1419 #if LIBUSB_API_VERSION >= 0x01000105
1420 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1422 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1424 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1425 buf = new uint8_t[num_bytes];
1428 xfr = libusb_alloc_transfer(num_iso_pack);
1430 fprintf(stderr, "oom\n");
1434 int ep = LIBUSB_ENDPOINT_IN | e;
1435 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1436 num_iso_pack, cb_xfr, nullptr, 0);
1437 libusb_set_iso_packet_lengths(xfr, size);
1438 xfr->user_data = this;
1441 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1444 iso_xfrs.push_back(xfr);
1449 void BMUSBCapture::start_bm_capture()
1452 for (libusb_transfer *xfr : iso_xfrs) {
1453 int rc = libusb_submit_transfer(xfr);
1456 //printf("num_bytes=%d\n", num_bytes);
1457 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1458 xfr->endpoint, i, libusb_error_name(rc));
1465 libusb_release_interface(devh, 0);
1469 libusb_exit(nullptr);
1474 void BMUSBCapture::stop_dequeue_thread()
1476 dequeue_thread_should_quit = true;
1477 queues_not_empty.notify_all();
1478 dequeue_thread.join();
1481 void BMUSBCapture::start_bm_thread()
1483 // Devices leaving are discovered by seeing the isochronous packets
1484 // coming back with errors, so only care about devices joining.
1485 if (card_connected_callback != nullptr) {
1486 if (libusb_hotplug_register_callback(
1487 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1488 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1489 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1490 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1495 should_quit = false;
1496 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1499 void BMUSBCapture::stop_bm_thread()
1502 libusb_interrupt_event_handler(nullptr);
1506 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1508 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1509 VideoMode auto_mode;
1510 auto_mode.name = "Autodetect";
1511 auto_mode.autodetect = true;
1512 return {{ 0, auto_mode }};
1515 uint32_t BMUSBCapture::get_current_video_mode() const
1517 return 0; // Matches get_available_video_modes().
1520 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1522 assert(video_mode_id == 0); // Matches get_available_video_modes().
1525 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1528 { 0x00000000, "HDMI/SDI" },
1529 { 0x02000000, "Component" },
1530 { 0x04000000, "Composite" },
1531 { 0x06000000, "S-video" }
1535 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1537 assert((video_input_id & ~0x06000000) == 0);
1538 current_video_input = video_input_id;
1539 update_capture_mode();
1542 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1545 { 0x00000000, "Embedded" },
1546 { 0x10000000, "Analog" }
1550 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1552 assert((audio_input_id & ~0x10000000) == 0);
1553 current_audio_input = audio_input_id;
1554 update_capture_mode();
1557 void BMUSBCapture::update_capture_mode()
1559 if (devh == nullptr) {
1563 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1564 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1565 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1566 mode |= htonl(0x20000000);
1568 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1571 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1572 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1574 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1579 } // namespace bmusb