1 // Intensity Shuttle USB3 capture driver, v0.6.0
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
203 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
204 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
205 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
206 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
207 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
208 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
209 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
210 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
211 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
212 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
213 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
214 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
216 for (const VideoFormatEntry &entry : entries) {
217 if (normalized_video_format == entry.normalized_video_format) {
218 decoded_video_format->width = entry.width;
219 decoded_video_format->height = entry.height;
220 if (video_format & 0x0800) {
221 decoded_video_format->stride = entry.width * 2;
223 decoded_video_format->stride = v210_stride(entry.width);
225 decoded_video_format->second_field_start = entry.second_field_start;
226 decoded_video_format->extra_lines_top = entry.extra_lines_top;
227 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
228 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
229 decoded_video_format->frame_rate_den = entry.frame_rate_den;
230 decoded_video_format->interlaced = entry.interlaced;
235 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
236 decoded_video_format->width = 1280;
237 decoded_video_format->height = 720;
238 decoded_video_format->stride = 1280 * 2;
239 decoded_video_format->frame_rate_nom = 60;
240 decoded_video_format->frame_rate_den = 1;
244 // There are seemingly no direct indicators of sample rate; you just get
245 // one frame's worth and have to guess from that.
246 int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
248 size_t num_samples = len / 3 / 8;
249 size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
251 // See if we match or are very close to any of the mandatory HDMI sample rates.
252 const int candidate_sample_rates[] = { 32000, 44100, 48000 };
253 for (int rate : candidate_sample_rates) {
254 if (abs(int(num_samples_per_second) - rate) < 50) {
259 fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
260 num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
266 FrameAllocator::~FrameAllocator() {}
268 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
269 : frame_size(frame_size)
271 for (size_t i = 0; i < num_queued_frames; ++i) {
272 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
276 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
281 unique_lock<mutex> lock(freelist_mutex); // Meh.
282 if (freelist.empty()) {
283 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
286 vf.data = freelist.top().release();
287 vf.size = frame_size;
288 freelist.pop(); // Meh.
293 void MallocFrameAllocator::release_frame(Frame frame)
295 if (frame.overflow > 0) {
296 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
298 unique_lock<mutex> lock(freelist_mutex);
299 freelist.push(unique_ptr<uint8_t[]>(frame.data));
302 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
307 return (b - a < 0x8000);
309 int wrap_b = 0x10000 + int(b);
310 return (wrap_b - a < 0x8000);
314 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
316 unique_lock<mutex> lock(queue_lock);
317 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
318 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
319 q->back().timecode, timecode);
320 frame.owner->release_frame(frame);
326 qf.timecode = timecode;
328 q->push_back(move(qf));
329 queues_not_empty.notify_one(); // might be spurious
332 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
334 FILE *fp = fopen(filename, "wb");
335 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
336 printf("short write!\n");
341 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
343 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
346 void BMUSBCapture::dequeue_thread_func()
348 char thread_name[16];
349 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
350 pthread_setname_np(pthread_self(), thread_name);
352 if (has_dequeue_callbacks) {
353 dequeue_init_callback();
355 size_t last_sample_rate = 48000;
356 while (!dequeue_thread_should_quit) {
357 unique_lock<mutex> lock(queue_lock);
358 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
360 if (dequeue_thread_should_quit) break;
362 uint16_t video_timecode = pending_video_frames.front().timecode;
363 uint16_t audio_timecode = pending_audio_frames.front().timecode;
364 AudioFormat audio_format;
365 audio_format.bits_per_sample = 24;
366 audio_format.num_channels = 8;
367 audio_format.sample_rate = last_sample_rate;
368 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
369 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
371 QueuedFrame video_frame = pending_video_frames.front();
372 pending_video_frames.pop_front();
374 video_frame_allocator->release_frame(video_frame.frame);
375 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
376 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
378 QueuedFrame audio_frame = pending_audio_frames.front();
379 pending_audio_frames.pop_front();
381 audio_format.id = audio_frame.format;
383 // Use the video format of the pending frame.
384 QueuedFrame video_frame = pending_video_frames.front();
385 VideoFormat video_format;
386 decode_video_format(video_frame.format, &video_format);
388 frame_callback(audio_timecode,
389 FrameAllocator::Frame(), 0, video_format,
390 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
392 QueuedFrame video_frame = pending_video_frames.front();
393 QueuedFrame audio_frame = pending_audio_frames.front();
394 pending_audio_frames.pop_front();
395 pending_video_frames.pop_front();
400 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
401 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
402 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
405 VideoFormat video_format;
406 audio_format.id = audio_frame.format;
407 if (decode_video_format(video_frame.format, &video_format)) {
408 if (audio_frame.frame.len != 0) {
409 audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
410 last_sample_rate = audio_format.sample_rate;
412 frame_callback(video_timecode,
413 video_frame.frame, HEADER_SIZE, video_format,
414 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
416 video_frame_allocator->release_frame(video_frame.frame);
417 audio_format.sample_rate = last_sample_rate;
418 frame_callback(video_timecode,
419 FrameAllocator::Frame(), 0, video_format,
420 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
424 if (has_dequeue_callbacks) {
425 dequeue_cleanup_callback();
429 void BMUSBCapture::start_new_frame(const uint8_t *start)
431 uint16_t format = (start[3] << 8) | start[2];
432 uint16_t timecode = (start[1] << 8) | start[0];
434 if (current_video_frame.len > 0) {
435 current_video_frame.received_timestamp = steady_clock::now();
437 // If format is 0x0800 (no signal), add a fake (empty) audio
438 // frame to get it out of the queue.
439 // TODO: Figure out if there are other formats that come with
440 // no audio, and treat them the same.
441 if (format == 0x0800) {
442 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
443 if (fake_audio_frame.data == nullptr) {
444 // Oh well, it's just a no-signal frame anyway.
445 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
446 current_video_frame.owner->release_frame(current_video_frame);
447 current_video_frame = video_frame_allocator->alloc_frame();
450 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
453 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
455 // Update the assumed frame width. We might be one frame too late on format changes,
456 // but it's much better than asking the user to choose manually.
457 VideoFormat video_format;
458 if (decode_video_format(format, &video_format)) {
459 assumed_frame_width = video_format.width;
462 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
464 // //start[7], start[6], start[5], start[4],
465 // read_current_frame, FRAME_SIZE);
467 current_video_frame = video_frame_allocator->alloc_frame();
468 //if (current_video_frame.data == nullptr) {
469 // read_current_frame = -1;
471 // read_current_frame = 0;
475 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
477 uint16_t format = (start[3] << 8) | start[2];
478 uint16_t timecode = (start[1] << 8) | start[0];
479 if (current_audio_frame.len > 0) {
480 current_audio_frame.received_timestamp = steady_clock::now();
481 //dump_audio_block();
482 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
484 //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
485 // format, timecode);
486 current_audio_frame = audio_frame_allocator->alloc_frame();
490 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
492 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
493 for (unsigned j = 0; j < pack->actual_length; j++) {
494 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
495 printf("%02x", xfr->buffer[j + offset]);
498 else if ((j % 8) == 7)
506 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
509 uint8_t *dptr1 = dest1;
510 uint8_t *dptr2 = dest2;
512 for (size_t i = 0; i < n; i += 2) {
518 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
520 if (current_frame->data == nullptr ||
521 current_frame->len > current_frame->size ||
526 int bytes = end - start;
527 if (current_frame->len + bytes > current_frame->size) {
528 current_frame->overflow = current_frame->len + bytes - current_frame->size;
529 current_frame->len = current_frame->size;
530 if (current_frame->overflow > 1048576) {
531 printf("%d bytes overflow after last %s frame\n",
532 int(current_frame->overflow), frame_type_name);
533 current_frame->overflow = 0;
537 if (current_frame->interleaved) {
538 uint8_t *data = current_frame->data + current_frame->len / 2;
539 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
540 if (current_frame->len % 2 == 1) {
544 if (bytes % 2 == 1) {
547 ++current_frame->len;
550 memcpy_interleaved(data, data2, start, bytes);
551 current_frame->len += bytes;
553 memcpy(current_frame->data + current_frame->len, start, bytes);
554 current_frame->len += bytes;
560 void avx2_dump(const char *name, __m256i n)
562 printf("%-10s:", name);
563 printf(" %02x", _mm256_extract_epi8(n, 0));
564 printf(" %02x", _mm256_extract_epi8(n, 1));
565 printf(" %02x", _mm256_extract_epi8(n, 2));
566 printf(" %02x", _mm256_extract_epi8(n, 3));
567 printf(" %02x", _mm256_extract_epi8(n, 4));
568 printf(" %02x", _mm256_extract_epi8(n, 5));
569 printf(" %02x", _mm256_extract_epi8(n, 6));
570 printf(" %02x", _mm256_extract_epi8(n, 7));
572 printf(" %02x", _mm256_extract_epi8(n, 8));
573 printf(" %02x", _mm256_extract_epi8(n, 9));
574 printf(" %02x", _mm256_extract_epi8(n, 10));
575 printf(" %02x", _mm256_extract_epi8(n, 11));
576 printf(" %02x", _mm256_extract_epi8(n, 12));
577 printf(" %02x", _mm256_extract_epi8(n, 13));
578 printf(" %02x", _mm256_extract_epi8(n, 14));
579 printf(" %02x", _mm256_extract_epi8(n, 15));
581 printf(" %02x", _mm256_extract_epi8(n, 16));
582 printf(" %02x", _mm256_extract_epi8(n, 17));
583 printf(" %02x", _mm256_extract_epi8(n, 18));
584 printf(" %02x", _mm256_extract_epi8(n, 19));
585 printf(" %02x", _mm256_extract_epi8(n, 20));
586 printf(" %02x", _mm256_extract_epi8(n, 21));
587 printf(" %02x", _mm256_extract_epi8(n, 22));
588 printf(" %02x", _mm256_extract_epi8(n, 23));
590 printf(" %02x", _mm256_extract_epi8(n, 24));
591 printf(" %02x", _mm256_extract_epi8(n, 25));
592 printf(" %02x", _mm256_extract_epi8(n, 26));
593 printf(" %02x", _mm256_extract_epi8(n, 27));
594 printf(" %02x", _mm256_extract_epi8(n, 28));
595 printf(" %02x", _mm256_extract_epi8(n, 29));
596 printf(" %02x", _mm256_extract_epi8(n, 30));
597 printf(" %02x", _mm256_extract_epi8(n, 31));
602 #ifndef HAS_MULTIVERSIONING
604 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
606 // No fast path possible unless we have multiversioning.
610 #else // defined(HAS_MULTIVERSIONING)
612 __attribute__((target("sse4.1")))
613 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
615 __attribute__((target("avx2")))
616 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
618 // Does a memcpy and memchr in one to reduce processing time.
619 // Note that the benefit is somewhat limited if your L3 cache is small,
620 // as you'll (unfortunately) spend most of the time loading the data
623 // Complicated cases are left to the slow path; it basically stops copying
624 // up until the first instance of "sync_char" (usually a bit before, actually).
625 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
626 // data, and what we really need this for is the 00 00 ff ff marker in video data.
627 __attribute__((target("default")))
628 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
630 // No fast path possible unless we have SSE 4.1 or higher.
634 __attribute__((target("sse4.1", "avx2")))
635 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
637 if (current_frame->data == nullptr ||
638 current_frame->len > current_frame->size ||
642 size_t orig_bytes = limit - start;
643 if (orig_bytes < 128) {
648 // Don't read more bytes than we can write.
649 limit = min(limit, start + (current_frame->size - current_frame->len));
651 // Align end to 32 bytes.
652 limit = (const uint8_t *)(intptr_t(limit) & ~31);
654 if (start >= limit) {
658 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
659 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
660 if (aligned_start != start) {
661 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
662 if (sync_start == nullptr) {
663 add_to_frame(current_frame, "", start, aligned_start);
665 add_to_frame(current_frame, "", start, sync_start);
670 // Make the length a multiple of 64.
671 if (current_frame->interleaved) {
672 if (((limit - aligned_start) % 64) != 0) {
675 assert(((limit - aligned_start) % 64) == 0);
678 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
681 __attribute__((target("avx2")))
682 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
684 const __m256i needle = _mm256_set1_epi8(sync_char);
686 const __restrict __m256i *in = (const __m256i *)aligned_start;
687 if (current_frame->interleaved) {
688 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
689 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
690 if (current_frame->len % 2 == 1) {
694 __m256i shuffle_cw = _mm256_set_epi8(
695 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
696 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
697 while (in < (const __m256i *)limit) {
698 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
699 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
700 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
702 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
703 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
704 __m256i found = _mm256_or_si256(found1, found2);
706 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
707 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
709 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
710 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
712 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
713 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
715 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
716 _mm256_storeu_si256(out2, hi);
718 if (!_mm256_testz_si256(found, found)) {
726 current_frame->len += (uint8_t *)in - aligned_start;
728 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
729 while (in < (const __m256i *)limit) {
730 __m256i data = _mm256_load_si256(in);
731 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
732 __m256i found = _mm256_cmpeq_epi8(data, needle);
733 if (!_mm256_testz_si256(found, found)) {
740 current_frame->len = (uint8_t *)out - current_frame->data;
743 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
744 return (const uint8_t *)in;
747 __attribute__((target("sse4.1")))
748 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
750 const __m128i needle = _mm_set1_epi8(sync_char);
752 const __m128i *in = (const __m128i *)aligned_start;
753 if (current_frame->interleaved) {
754 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
755 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
756 if (current_frame->len % 2 == 1) {
760 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
761 while (in < (const __m128i *)limit) {
762 __m128i data1 = _mm_load_si128(in);
763 __m128i data2 = _mm_load_si128(in + 1);
764 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
765 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
766 __m128i data1_hi = _mm_srli_epi16(data1, 8);
767 __m128i data2_hi = _mm_srli_epi16(data2, 8);
768 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
769 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
770 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
771 _mm_storeu_si128(out2, hi);
772 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
773 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
774 if (!_mm_testz_si128(found1, found1) ||
775 !_mm_testz_si128(found2, found2)) {
783 current_frame->len += (uint8_t *)in - aligned_start;
785 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
786 while (in < (const __m128i *)limit) {
787 __m128i data = _mm_load_si128(in);
788 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
789 __m128i found = _mm_cmpeq_epi8(data, needle);
790 if (!_mm_testz_si128(found, found)) {
797 current_frame->len = (uint8_t *)out - current_frame->data;
800 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
801 return (const uint8_t *)in;
804 #endif // defined(HAS_MULTIVERSIONING)
806 void decode_packs(const libusb_transfer *xfr,
807 const char *sync_pattern,
809 FrameAllocator::Frame *current_frame,
810 const char *frame_type_name,
811 function<void(const uint8_t *start)> start_callback)
814 for (int i = 0; i < xfr->num_iso_packets; i++) {
815 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
817 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
818 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
823 const uint8_t *start = xfr->buffer + offset;
824 const uint8_t *limit = start + pack->actual_length;
825 while (start < limit) { // Usually runs only one iteration.
826 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
827 if (start == limit) break;
828 assert(start < limit);
830 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
831 if (start_next_frame == nullptr) {
832 // add the rest of the buffer
833 add_to_frame(current_frame, frame_type_name, start, limit);
836 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
837 start = start_next_frame + sync_length; // skip sync
838 start_callback(start);
842 dump_pack(xfr, offset, pack);
844 offset += pack->length;
848 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
850 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
851 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
852 fprintf(stderr, "error: transfer status %d\n", xfr->status);
853 libusb_free_transfer(xfr);
857 assert(xfr->user_data != nullptr);
858 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
860 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
861 if (!usb->disconnected) {
862 fprintf(stderr, "Device went away, stopping transfers.\n");
863 usb->disconnected = true;
864 if (usb->card_disconnected_callback) {
865 usb->card_disconnected_callback();
868 // Don't reschedule the transfer; the loop will stop by itself.
872 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
873 if (xfr->endpoint == 0x84) {
874 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
876 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
878 // Update the transfer with the new assumed width, if we're in the process of changing formats.
879 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
882 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
883 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
884 uint8_t *buf = libusb_control_transfer_get_data(xfr);
886 if (setup->wIndex == 44) {
887 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
889 printf("read register %2d: 0x%02x%02x%02x%02x\n",
890 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
893 memcpy(usb->register_file + usb->current_register, buf, 4);
894 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
895 if (usb->current_register == 0) {
896 // read through all of them
897 printf("register dump:");
898 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
899 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
903 libusb_fill_control_setup(xfr->buffer,
904 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
905 /*index=*/usb->current_register, /*length=*/4);
910 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
911 for (i = 0; i < xfr->actual_length; i++) {
912 printf("%02x", xfr->buffer[i]);
922 int rc = libusb_submit_transfer(xfr);
924 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
929 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
931 if (card_connected_callback != nullptr) {
932 libusb_device_descriptor desc;
933 if (libusb_get_device_descriptor(dev, &desc) < 0) {
934 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
935 libusb_unref_device(dev);
939 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
940 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
941 card_connected_callback(dev); // Callback takes ownership.
945 libusb_unref_device(dev);
949 void BMUSBCapture::usb_thread_func()
952 memset(¶m, 0, sizeof(param));
953 param.sched_priority = 1;
954 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
955 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
957 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
958 while (!should_quit) {
959 timeval sec { 1, 0 };
960 int rc = libusb_handle_events_timeout(nullptr, &sec);
961 if (rc != LIBUSB_SUCCESS)
968 struct USBCardDevice {
971 libusb_device *device;
974 const char *get_product_name(uint16_t product)
976 if (product == 0xbd3b) {
977 return "Intensity Shuttle";
978 } else if (product == 0xbd4f) {
979 return "UltraStudio SDI";
986 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
988 const char *product_name = get_product_name(product);
991 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
992 id, bus, port, product_name);
996 vector<USBCardDevice> find_all_cards()
998 libusb_device **devices;
999 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
1000 if (num_devices == -1) {
1001 fprintf(stderr, "Error finding USB devices\n");
1004 vector<USBCardDevice> found_cards;
1005 for (ssize_t i = 0; i < num_devices; ++i) {
1006 libusb_device_descriptor desc;
1007 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
1008 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
1012 uint8_t bus = libusb_get_bus_number(devices[i]);
1013 uint8_t port = libusb_get_port_number(devices[i]);
1015 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
1016 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
1017 libusb_unref_device(devices[i]);
1021 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
1023 libusb_free_device_list(devices, 0);
1025 // Sort the devices to get a consistent ordering.
1026 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
1027 if (a.product != b.product)
1028 return a.product < b.product;
1030 return a.bus < b.bus;
1031 return a.port < b.port;
1037 libusb_device_handle *open_card(int card_index, string *description)
1039 vector<USBCardDevice> found_cards = find_all_cards();
1041 for (size_t i = 0; i < found_cards.size(); ++i) {
1042 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1043 fprintf(stderr, "%s\n", tmp_description.c_str());
1044 if (i == size_t(card_index)) {
1045 *description = tmp_description;
1049 if (size_t(card_index) >= found_cards.size()) {
1050 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1054 libusb_device_handle *devh;
1055 int rc = libusb_open(found_cards[card_index].device, &devh);
1057 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1061 for (size_t i = 0; i < found_cards.size(); ++i) {
1062 libusb_unref_device(found_cards[i].device);
1068 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1070 uint8_t bus = libusb_get_bus_number(dev);
1071 uint8_t port = libusb_get_port_number(dev);
1073 libusb_device_descriptor desc;
1074 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1075 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1079 *description = get_card_description(card_index, bus, port, desc.idProduct);
1081 libusb_device_handle *devh;
1082 int rc = libusb_open(dev, &devh);
1084 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1093 unsigned BMUSBCapture::num_cards()
1095 int rc = libusb_init(nullptr);
1097 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1101 vector<USBCardDevice> found_cards = find_all_cards();
1102 unsigned ret = found_cards.size();
1103 for (size_t i = 0; i < found_cards.size(); ++i) {
1104 libusb_unref_device(found_cards[i].device);
1109 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1111 current_pixel_format = pixel_format;
1112 update_capture_mode();
1115 void BMUSBCapture::configure_card()
1117 if (video_frame_allocator == nullptr) {
1118 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1119 set_video_frame_allocator(owned_video_frame_allocator.get());
1121 if (audio_frame_allocator == nullptr) {
1122 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1123 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1125 dequeue_thread_should_quit = false;
1126 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1129 struct libusb_transfer *xfr;
1131 rc = libusb_init(nullptr);
1133 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1137 if (dev == nullptr) {
1138 devh = open_card(card_index, &description);
1140 devh = open_card(card_index, dev, &description);
1141 libusb_unref_device(dev);
1144 fprintf(stderr, "Error finding USB device\n");
1148 libusb_config_descriptor *config;
1149 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1151 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1156 printf("%d interface\n", config->bNumInterfaces);
1157 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1158 printf(" interface %d\n", interface_number);
1159 const libusb_interface *interface = &config->interface[interface_number];
1160 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1161 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1162 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1163 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1164 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1165 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1171 rc = libusb_set_configuration(devh, /*configuration=*/1);
1173 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1177 rc = libusb_claim_interface(devh, 0);
1179 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1183 // Alternate setting 1 is output, alternate setting 2 is input.
1184 // Card is reset when switching alternates, so the driver uses
1185 // this “double switch” when it wants to reset.
1187 // There's also alternate settings 3 and 4, which seem to be
1188 // like 1 and 2 except they advertise less bandwidth needed.
1189 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1191 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1192 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1193 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1194 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1195 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1199 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1201 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1205 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1207 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1213 rc = libusb_claim_interface(devh, 3);
1215 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1221 // 44 is some kind of timer register (first 16 bits count upwards)
1222 // 24 is some sort of watchdog?
1223 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1224 // (or will go to 0x73c60010?), also seen 0x73c60100
1225 // 12 also changes all the time, unclear why
1226 // 16 seems to be autodetected mode somehow
1227 // -- this is e00115e0 after reset?
1228 // ed0115e0 after mode change [to output?]
1229 // 2d0015e0 after more mode change [to input]
1230 // ed0115e0 after more mode change
1231 // 2d0015e0 after more mode change
1233 // 390115e0 seems to indicate we have signal
1234 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1236 // 200015e0 on startup
1237 // changes to 250115e0 when we sync to the signal
1239 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1241 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1243 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1244 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1246 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1247 // perhaps some of them are related to analog output?
1249 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1250 // but the driver sets it to 0x8036802a at some point.
1252 // all of this is on request 214/215. other requests (192, 219,
1253 // 222, 223, 224) are used for firmware upgrade. Probably best to
1254 // stay out of it unless you know what you're doing.
1258 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1261 // 0x01 - stable signal
1262 // 0x04 - deep color
1263 // 0x08 - unknown (audio??)
1267 update_capture_mode();
1275 static const ctrl ctrls[] = {
1276 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1277 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1279 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1280 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1281 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1282 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1285 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1286 uint32_t flipped = htonl(ctrls[req].data);
1287 static uint8_t value[4];
1288 memcpy(value, &flipped, sizeof(flipped));
1289 int size = sizeof(value);
1290 //if (ctrls[req].request == 215) size = 0;
1291 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1292 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1294 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1298 if (ctrls[req].index == 16 && rc == 4) {
1299 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1303 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1304 for (int i = 0; i < rc; ++i) {
1305 printf("%02x", value[i]);
1314 static int my_index = 0;
1315 static uint8_t value[4];
1316 int size = sizeof(value);
1317 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1318 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1320 fprintf(stderr, "Error on control\n");
1323 printf("rc=%d index=%d: 0x", rc, my_index);
1324 for (int i = 0; i < rc; ++i) {
1325 printf("%02x", value[i]);
1332 // set up an asynchronous transfer of the timer register
1333 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1334 static int completed = 0;
1336 xfr = libusb_alloc_transfer(0);
1337 libusb_fill_control_setup(cmdbuf,
1338 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1339 /*index=*/44, /*length=*/4);
1340 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1341 xfr->user_data = this;
1342 libusb_submit_transfer(xfr);
1344 // set up an asynchronous transfer of register 24
1345 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1346 static int completed2 = 0;
1348 xfr = libusb_alloc_transfer(0);
1349 libusb_fill_control_setup(cmdbuf2,
1350 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1351 /*index=*/24, /*length=*/4);
1352 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1353 xfr->user_data = this;
1354 libusb_submit_transfer(xfr);
1357 // set up an asynchronous transfer of the register dump
1358 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1359 static int completed3 = 0;
1361 xfr = libusb_alloc_transfer(0);
1362 libusb_fill_control_setup(cmdbuf3,
1363 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1364 /*index=*/current_register, /*length=*/4);
1365 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1366 xfr->user_data = this;
1367 //libusb_submit_transfer(xfr);
1369 //audiofp = fopen("audio.raw", "wb");
1371 // set up isochronous transfers for audio and video
1372 for (int e = 3; e <= 4; ++e) {
1373 int num_transfers = 6;
1374 for (int i = 0; i < num_transfers; ++i) {
1376 int num_iso_pack, size;
1378 // Allocate for minimum width (because that will give us the most
1379 // number of packets, so we don't need to reallocate, but we'll
1380 // default to 720p for the first frame.
1381 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1382 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1383 buf_size = USB_VIDEO_TRANSFER_SIZE;
1387 buf_size = num_iso_pack * size;
1389 int num_bytes = num_iso_pack * size;
1390 assert(size_t(num_bytes) <= buf_size);
1391 #if LIBUSB_API_VERSION >= 0x01000105
1392 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1394 uint8_t *buf = nullptr;
1396 if (buf == nullptr) {
1397 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1398 #if LIBUSB_API_VERSION >= 0x01000105
1399 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1401 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1403 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1404 buf = new uint8_t[num_bytes];
1407 xfr = libusb_alloc_transfer(num_iso_pack);
1409 fprintf(stderr, "oom\n");
1413 int ep = LIBUSB_ENDPOINT_IN | e;
1414 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1415 num_iso_pack, cb_xfr, nullptr, 0);
1416 libusb_set_iso_packet_lengths(xfr, size);
1417 xfr->user_data = this;
1420 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1423 iso_xfrs.push_back(xfr);
1428 void BMUSBCapture::start_bm_capture()
1431 for (libusb_transfer *xfr : iso_xfrs) {
1432 int rc = libusb_submit_transfer(xfr);
1435 //printf("num_bytes=%d\n", num_bytes);
1436 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1437 xfr->endpoint, i, libusb_error_name(rc));
1444 libusb_release_interface(devh, 0);
1448 libusb_exit(nullptr);
1453 void BMUSBCapture::stop_dequeue_thread()
1455 dequeue_thread_should_quit = true;
1456 queues_not_empty.notify_all();
1457 dequeue_thread.join();
1460 void BMUSBCapture::start_bm_thread()
1462 // Devices leaving are discovered by seeing the isochronous packets
1463 // coming back with errors, so only care about devices joining.
1464 if (card_connected_callback != nullptr) {
1465 if (libusb_hotplug_register_callback(
1466 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1467 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1468 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1469 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1474 should_quit = false;
1475 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1478 void BMUSBCapture::stop_bm_thread()
1484 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1486 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1487 VideoMode auto_mode;
1488 auto_mode.name = "Autodetect";
1489 auto_mode.autodetect = true;
1490 return {{ 0, auto_mode }};
1493 uint32_t BMUSBCapture::get_current_video_mode() const
1495 return 0; // Matches get_available_video_modes().
1498 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1500 assert(video_mode_id == 0); // Matches get_available_video_modes().
1503 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1506 { 0x00000000, "HDMI/SDI" },
1507 { 0x02000000, "Component" },
1508 { 0x04000000, "Composite" },
1509 { 0x06000000, "S-video" }
1513 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1515 assert((video_input_id & ~0x06000000) == 0);
1516 current_video_input = video_input_id;
1517 update_capture_mode();
1520 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1523 { 0x00000000, "Embedded" },
1524 { 0x10000000, "Analog" }
1528 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1530 assert((audio_input_id & ~0x10000000) == 0);
1531 current_audio_input = audio_input_id;
1532 update_capture_mode();
1535 void BMUSBCapture::update_capture_mode()
1537 if (devh == nullptr) {
1541 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1542 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1543 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1544 mode |= htonl(0x20000000);
1546 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1549 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1550 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1552 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1557 } // namespace bmusb