1 // Intensity Shuttle USB3 capture driver, v0.6.0
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
203 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
204 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
205 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
206 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
207 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
208 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
209 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
210 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
211 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
212 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
213 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
214 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
216 for (const VideoFormatEntry &entry : entries) {
217 if (normalized_video_format == entry.normalized_video_format) {
218 decoded_video_format->width = entry.width;
219 decoded_video_format->height = entry.height;
220 if (video_format & 0x0800) {
221 decoded_video_format->stride = entry.width * 2;
223 decoded_video_format->stride = v210_stride(entry.width);
225 decoded_video_format->second_field_start = entry.second_field_start;
226 decoded_video_format->extra_lines_top = entry.extra_lines_top;
227 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
228 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
229 decoded_video_format->frame_rate_den = entry.frame_rate_den;
230 decoded_video_format->interlaced = entry.interlaced;
235 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
236 decoded_video_format->width = 1280;
237 decoded_video_format->height = 720;
238 decoded_video_format->stride = 1280 * 2;
239 decoded_video_format->frame_rate_nom = 60;
240 decoded_video_format->frame_rate_den = 1;
244 // There are seemingly no direct indicators of sample rate; you just get
245 // one frame's worth and have to guess from that.
246 int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
248 size_t num_samples = len / 3 / 8;
249 size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
251 // See if we match or are very close to any of the mandatory HDMI sample rates.
252 const int candidate_sample_rates[] = { 32000, 44100, 48000 };
253 for (int rate : candidate_sample_rates) {
254 if (abs(int(num_samples_per_second) - rate) < 50) {
259 fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
260 num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
266 FrameAllocator::~FrameAllocator() {}
268 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
269 : frame_size(frame_size)
271 for (size_t i = 0; i < num_queued_frames; ++i) {
272 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
276 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
281 unique_lock<mutex> lock(freelist_mutex); // Meh.
282 if (freelist.empty()) {
283 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
286 vf.data = freelist.top().release();
287 vf.size = frame_size;
288 freelist.pop(); // Meh.
293 void MallocFrameAllocator::release_frame(Frame frame)
295 if (frame.overflow > 0) {
296 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
298 unique_lock<mutex> lock(freelist_mutex);
299 freelist.push(unique_ptr<uint8_t[]>(frame.data));
302 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
307 return (b - a < 0x8000);
309 int wrap_b = 0x10000 + int(b);
310 return (wrap_b - a < 0x8000);
314 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
316 unique_lock<mutex> lock(queue_lock);
317 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
318 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
319 q->back().timecode, timecode);
320 frame.owner->release_frame(frame);
326 qf.timecode = timecode;
328 q->push_back(move(qf));
329 queues_not_empty.notify_one(); // might be spurious
332 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
334 FILE *fp = fopen(filename, "wb");
335 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
336 printf("short write!\n");
341 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
343 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
346 void BMUSBCapture::dequeue_thread_func()
348 char thread_name[16];
349 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
350 pthread_setname_np(pthread_self(), thread_name);
352 if (has_dequeue_callbacks) {
353 dequeue_init_callback();
355 size_t last_sample_rate = 48000;
356 while (!dequeue_thread_should_quit) {
357 unique_lock<mutex> lock(queue_lock);
358 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
360 if (dequeue_thread_should_quit) break;
362 uint16_t video_timecode = pending_video_frames.front().timecode;
363 uint16_t audio_timecode = pending_audio_frames.front().timecode;
364 AudioFormat audio_format;
365 audio_format.bits_per_sample = 24;
366 audio_format.num_channels = 8;
367 audio_format.sample_rate = last_sample_rate;
368 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
369 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
371 QueuedFrame video_frame = pending_video_frames.front();
372 pending_video_frames.pop_front();
374 video_frame_allocator->release_frame(video_frame.frame);
375 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
376 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
378 QueuedFrame audio_frame = pending_audio_frames.front();
379 pending_audio_frames.pop_front();
381 audio_format.id = audio_frame.format;
383 // Use the video format of the pending frame.
384 QueuedFrame video_frame = pending_video_frames.front();
385 VideoFormat video_format;
386 decode_video_format(video_frame.format, &video_format);
388 frame_callback(audio_timecode,
389 FrameAllocator::Frame(), 0, video_format,
390 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
392 QueuedFrame video_frame = pending_video_frames.front();
393 QueuedFrame audio_frame = pending_audio_frames.front();
394 pending_audio_frames.pop_front();
395 pending_video_frames.pop_front();
400 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
401 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
402 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
405 VideoFormat video_format;
406 audio_format.id = audio_frame.format;
407 if (decode_video_format(video_frame.format, &video_format)) {
408 if (audio_frame.frame.len != 0) {
409 audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
410 last_sample_rate = audio_format.sample_rate;
412 frame_callback(video_timecode,
413 video_frame.frame, HEADER_SIZE, video_format,
414 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
416 audio_format.sample_rate = last_sample_rate;
417 frame_callback(video_timecode,
418 FrameAllocator::Frame(), 0, video_format,
419 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
423 if (has_dequeue_callbacks) {
424 dequeue_cleanup_callback();
428 void BMUSBCapture::start_new_frame(const uint8_t *start)
430 uint16_t format = (start[3] << 8) | start[2];
431 uint16_t timecode = (start[1] << 8) | start[0];
433 if (current_video_frame.len > 0) {
434 current_video_frame.received_timestamp = steady_clock::now();
436 // If format is 0x0800 (no signal), add a fake (empty) audio
437 // frame to get it out of the queue.
438 // TODO: Figure out if there are other formats that come with
439 // no audio, and treat them the same.
440 if (format == 0x0800) {
441 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
442 if (fake_audio_frame.data == nullptr) {
443 // Oh well, it's just a no-signal frame anyway.
444 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
445 current_video_frame.owner->release_frame(current_video_frame);
446 current_video_frame = video_frame_allocator->alloc_frame();
449 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
452 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
454 // Update the assumed frame width. We might be one frame too late on format changes,
455 // but it's much better than asking the user to choose manually.
456 VideoFormat video_format;
457 if (decode_video_format(format, &video_format)) {
458 assumed_frame_width = video_format.width;
461 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
463 // //start[7], start[6], start[5], start[4],
464 // read_current_frame, FRAME_SIZE);
466 current_video_frame = video_frame_allocator->alloc_frame();
467 //if (current_video_frame.data == nullptr) {
468 // read_current_frame = -1;
470 // read_current_frame = 0;
474 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
476 uint16_t format = (start[3] << 8) | start[2];
477 uint16_t timecode = (start[1] << 8) | start[0];
478 if (current_audio_frame.len > 0) {
479 current_audio_frame.received_timestamp = steady_clock::now();
480 //dump_audio_block();
481 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
483 //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
484 // format, timecode);
485 current_audio_frame = audio_frame_allocator->alloc_frame();
489 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
491 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
492 for (unsigned j = 0; j < pack->actual_length; j++) {
493 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
494 printf("%02x", xfr->buffer[j + offset]);
497 else if ((j % 8) == 7)
505 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
508 uint8_t *dptr1 = dest1;
509 uint8_t *dptr2 = dest2;
511 for (size_t i = 0; i < n; i += 2) {
517 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
519 if (current_frame->data == nullptr ||
520 current_frame->len > current_frame->size ||
525 int bytes = end - start;
526 if (current_frame->len + bytes > current_frame->size) {
527 current_frame->overflow = current_frame->len + bytes - current_frame->size;
528 current_frame->len = current_frame->size;
529 if (current_frame->overflow > 1048576) {
530 printf("%d bytes overflow after last %s frame\n",
531 int(current_frame->overflow), frame_type_name);
532 current_frame->overflow = 0;
536 if (current_frame->interleaved) {
537 uint8_t *data = current_frame->data + current_frame->len / 2;
538 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
539 if (current_frame->len % 2 == 1) {
543 if (bytes % 2 == 1) {
546 ++current_frame->len;
549 memcpy_interleaved(data, data2, start, bytes);
550 current_frame->len += bytes;
552 memcpy(current_frame->data + current_frame->len, start, bytes);
553 current_frame->len += bytes;
559 void avx2_dump(const char *name, __m256i n)
561 printf("%-10s:", name);
562 printf(" %02x", _mm256_extract_epi8(n, 0));
563 printf(" %02x", _mm256_extract_epi8(n, 1));
564 printf(" %02x", _mm256_extract_epi8(n, 2));
565 printf(" %02x", _mm256_extract_epi8(n, 3));
566 printf(" %02x", _mm256_extract_epi8(n, 4));
567 printf(" %02x", _mm256_extract_epi8(n, 5));
568 printf(" %02x", _mm256_extract_epi8(n, 6));
569 printf(" %02x", _mm256_extract_epi8(n, 7));
571 printf(" %02x", _mm256_extract_epi8(n, 8));
572 printf(" %02x", _mm256_extract_epi8(n, 9));
573 printf(" %02x", _mm256_extract_epi8(n, 10));
574 printf(" %02x", _mm256_extract_epi8(n, 11));
575 printf(" %02x", _mm256_extract_epi8(n, 12));
576 printf(" %02x", _mm256_extract_epi8(n, 13));
577 printf(" %02x", _mm256_extract_epi8(n, 14));
578 printf(" %02x", _mm256_extract_epi8(n, 15));
580 printf(" %02x", _mm256_extract_epi8(n, 16));
581 printf(" %02x", _mm256_extract_epi8(n, 17));
582 printf(" %02x", _mm256_extract_epi8(n, 18));
583 printf(" %02x", _mm256_extract_epi8(n, 19));
584 printf(" %02x", _mm256_extract_epi8(n, 20));
585 printf(" %02x", _mm256_extract_epi8(n, 21));
586 printf(" %02x", _mm256_extract_epi8(n, 22));
587 printf(" %02x", _mm256_extract_epi8(n, 23));
589 printf(" %02x", _mm256_extract_epi8(n, 24));
590 printf(" %02x", _mm256_extract_epi8(n, 25));
591 printf(" %02x", _mm256_extract_epi8(n, 26));
592 printf(" %02x", _mm256_extract_epi8(n, 27));
593 printf(" %02x", _mm256_extract_epi8(n, 28));
594 printf(" %02x", _mm256_extract_epi8(n, 29));
595 printf(" %02x", _mm256_extract_epi8(n, 30));
596 printf(" %02x", _mm256_extract_epi8(n, 31));
601 #ifndef HAS_MULTIVERSIONING
603 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
605 // No fast path possible unless we have multiversioning.
609 #else // defined(HAS_MULTIVERSIONING)
611 __attribute__((target("sse4.1")))
612 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
614 __attribute__((target("avx2")))
615 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
617 // Does a memcpy and memchr in one to reduce processing time.
618 // Note that the benefit is somewhat limited if your L3 cache is small,
619 // as you'll (unfortunately) spend most of the time loading the data
622 // Complicated cases are left to the slow path; it basically stops copying
623 // up until the first instance of "sync_char" (usually a bit before, actually).
624 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
625 // data, and what we really need this for is the 00 00 ff ff marker in video data.
626 __attribute__((target("default")))
627 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
629 // No fast path possible unless we have SSE 4.1 or higher.
633 __attribute__((target("sse4.1", "avx2")))
634 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
636 if (current_frame->data == nullptr ||
637 current_frame->len > current_frame->size ||
641 size_t orig_bytes = limit - start;
642 if (orig_bytes < 128) {
647 // Don't read more bytes than we can write.
648 limit = min(limit, start + (current_frame->size - current_frame->len));
650 // Align end to 32 bytes.
651 limit = (const uint8_t *)(intptr_t(limit) & ~31);
653 if (start >= limit) {
657 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
658 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
659 if (aligned_start != start) {
660 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
661 if (sync_start == nullptr) {
662 add_to_frame(current_frame, "", start, aligned_start);
664 add_to_frame(current_frame, "", start, sync_start);
669 // Make the length a multiple of 64.
670 if (current_frame->interleaved) {
671 if (((limit - aligned_start) % 64) != 0) {
674 assert(((limit - aligned_start) % 64) == 0);
677 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
680 __attribute__((target("avx2")))
681 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
683 const __m256i needle = _mm256_set1_epi8(sync_char);
685 const __restrict __m256i *in = (const __m256i *)aligned_start;
686 if (current_frame->interleaved) {
687 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
688 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
689 if (current_frame->len % 2 == 1) {
693 __m256i shuffle_cw = _mm256_set_epi8(
694 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
695 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
696 while (in < (const __m256i *)limit) {
697 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
698 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
699 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
701 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
702 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
703 __m256i found = _mm256_or_si256(found1, found2);
705 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
706 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
708 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
709 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
711 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
712 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
714 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
715 _mm256_storeu_si256(out2, hi);
717 if (!_mm256_testz_si256(found, found)) {
725 current_frame->len += (uint8_t *)in - aligned_start;
727 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
728 while (in < (const __m256i *)limit) {
729 __m256i data = _mm256_load_si256(in);
730 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
731 __m256i found = _mm256_cmpeq_epi8(data, needle);
732 if (!_mm256_testz_si256(found, found)) {
739 current_frame->len = (uint8_t *)out - current_frame->data;
742 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
743 return (const uint8_t *)in;
746 __attribute__((target("sse4.1")))
747 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
749 const __m128i needle = _mm_set1_epi8(sync_char);
751 const __m128i *in = (const __m128i *)aligned_start;
752 if (current_frame->interleaved) {
753 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
754 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
755 if (current_frame->len % 2 == 1) {
759 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
760 while (in < (const __m128i *)limit) {
761 __m128i data1 = _mm_load_si128(in);
762 __m128i data2 = _mm_load_si128(in + 1);
763 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
764 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
765 __m128i data1_hi = _mm_srli_epi16(data1, 8);
766 __m128i data2_hi = _mm_srli_epi16(data2, 8);
767 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
768 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
769 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
770 _mm_storeu_si128(out2, hi);
771 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
772 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
773 if (!_mm_testz_si128(found1, found1) ||
774 !_mm_testz_si128(found2, found2)) {
782 current_frame->len += (uint8_t *)in - aligned_start;
784 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
785 while (in < (const __m128i *)limit) {
786 __m128i data = _mm_load_si128(in);
787 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
788 __m128i found = _mm_cmpeq_epi8(data, needle);
789 if (!_mm_testz_si128(found, found)) {
796 current_frame->len = (uint8_t *)out - current_frame->data;
799 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
800 return (const uint8_t *)in;
803 #endif // defined(HAS_MULTIVERSIONING)
805 void decode_packs(const libusb_transfer *xfr,
806 const char *sync_pattern,
808 FrameAllocator::Frame *current_frame,
809 const char *frame_type_name,
810 function<void(const uint8_t *start)> start_callback)
813 for (int i = 0; i < xfr->num_iso_packets; i++) {
814 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
816 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
817 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
822 const uint8_t *start = xfr->buffer + offset;
823 const uint8_t *limit = start + pack->actual_length;
824 while (start < limit) { // Usually runs only one iteration.
825 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
826 if (start == limit) break;
827 assert(start < limit);
829 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
830 if (start_next_frame == nullptr) {
831 // add the rest of the buffer
832 add_to_frame(current_frame, frame_type_name, start, limit);
835 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
836 start = start_next_frame + sync_length; // skip sync
837 start_callback(start);
841 dump_pack(xfr, offset, pack);
843 offset += pack->length;
847 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
849 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
850 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
851 fprintf(stderr, "error: transfer status %d\n", xfr->status);
852 libusb_free_transfer(xfr);
856 assert(xfr->user_data != nullptr);
857 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
859 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
860 if (!usb->disconnected) {
861 fprintf(stderr, "Device went away, stopping transfers.\n");
862 usb->disconnected = true;
863 if (usb->card_disconnected_callback) {
864 usb->card_disconnected_callback();
867 // Don't reschedule the transfer; the loop will stop by itself.
871 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
872 if (xfr->endpoint == 0x84) {
873 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
875 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
877 // Update the transfer with the new assumed width, if we're in the process of changing formats.
878 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
881 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
882 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
883 uint8_t *buf = libusb_control_transfer_get_data(xfr);
885 if (setup->wIndex == 44) {
886 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
888 printf("read register %2d: 0x%02x%02x%02x%02x\n",
889 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
892 memcpy(usb->register_file + usb->current_register, buf, 4);
893 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
894 if (usb->current_register == 0) {
895 // read through all of them
896 printf("register dump:");
897 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
898 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
902 libusb_fill_control_setup(xfr->buffer,
903 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
904 /*index=*/usb->current_register, /*length=*/4);
909 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
910 for (i = 0; i < xfr->actual_length; i++) {
911 printf("%02x", xfr->buffer[i]);
921 int rc = libusb_submit_transfer(xfr);
923 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
928 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
930 if (card_connected_callback != nullptr) {
931 libusb_device_descriptor desc;
932 if (libusb_get_device_descriptor(dev, &desc) < 0) {
933 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
934 libusb_unref_device(dev);
938 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
939 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
940 card_connected_callback(dev); // Callback takes ownership.
944 libusb_unref_device(dev);
948 void BMUSBCapture::usb_thread_func()
951 memset(¶m, 0, sizeof(param));
952 param.sched_priority = 1;
953 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
954 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
956 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
957 while (!should_quit) {
958 timeval sec { 1, 0 };
959 int rc = libusb_handle_events_timeout(nullptr, &sec);
960 if (rc != LIBUSB_SUCCESS)
967 struct USBCardDevice {
970 libusb_device *device;
973 const char *get_product_name(uint16_t product)
975 if (product == 0xbd3b) {
976 return "Intensity Shuttle";
977 } else if (product == 0xbd4f) {
978 return "UltraStudio SDI";
985 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
987 const char *product_name = get_product_name(product);
990 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
991 id, bus, port, product_name);
995 vector<USBCardDevice> find_all_cards()
997 libusb_device **devices;
998 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
999 if (num_devices == -1) {
1000 fprintf(stderr, "Error finding USB devices\n");
1003 vector<USBCardDevice> found_cards;
1004 for (ssize_t i = 0; i < num_devices; ++i) {
1005 libusb_device_descriptor desc;
1006 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
1007 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
1011 uint8_t bus = libusb_get_bus_number(devices[i]);
1012 uint8_t port = libusb_get_port_number(devices[i]);
1014 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
1015 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
1016 libusb_unref_device(devices[i]);
1020 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
1022 libusb_free_device_list(devices, 0);
1024 // Sort the devices to get a consistent ordering.
1025 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
1026 if (a.product != b.product)
1027 return a.product < b.product;
1029 return a.bus < b.bus;
1030 return a.port < b.port;
1036 libusb_device_handle *open_card(int card_index, string *description)
1038 vector<USBCardDevice> found_cards = find_all_cards();
1040 for (size_t i = 0; i < found_cards.size(); ++i) {
1041 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1042 fprintf(stderr, "%s\n", tmp_description.c_str());
1043 if (i == size_t(card_index)) {
1044 *description = tmp_description;
1048 if (size_t(card_index) >= found_cards.size()) {
1049 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1053 libusb_device_handle *devh;
1054 int rc = libusb_open(found_cards[card_index].device, &devh);
1056 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1060 for (size_t i = 0; i < found_cards.size(); ++i) {
1061 libusb_unref_device(found_cards[i].device);
1067 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1069 uint8_t bus = libusb_get_bus_number(dev);
1070 uint8_t port = libusb_get_port_number(dev);
1072 libusb_device_descriptor desc;
1073 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1074 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1078 *description = get_card_description(card_index, bus, port, desc.idProduct);
1080 libusb_device_handle *devh;
1081 int rc = libusb_open(dev, &devh);
1083 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1092 unsigned BMUSBCapture::num_cards()
1094 int rc = libusb_init(nullptr);
1096 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1100 vector<USBCardDevice> found_cards = find_all_cards();
1101 unsigned ret = found_cards.size();
1102 for (size_t i = 0; i < found_cards.size(); ++i) {
1103 libusb_unref_device(found_cards[i].device);
1108 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1110 current_pixel_format = pixel_format;
1111 update_capture_mode();
1114 void BMUSBCapture::configure_card()
1116 if (video_frame_allocator == nullptr) {
1117 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1118 set_video_frame_allocator(owned_video_frame_allocator.get());
1120 if (audio_frame_allocator == nullptr) {
1121 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1122 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1124 dequeue_thread_should_quit = false;
1125 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1128 struct libusb_transfer *xfr;
1130 rc = libusb_init(nullptr);
1132 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1136 if (dev == nullptr) {
1137 devh = open_card(card_index, &description);
1139 devh = open_card(card_index, dev, &description);
1140 libusb_unref_device(dev);
1143 fprintf(stderr, "Error finding USB device\n");
1147 libusb_config_descriptor *config;
1148 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1150 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1155 printf("%d interface\n", config->bNumInterfaces);
1156 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1157 printf(" interface %d\n", interface_number);
1158 const libusb_interface *interface = &config->interface[interface_number];
1159 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1160 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1161 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1162 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1163 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1164 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1170 rc = libusb_set_configuration(devh, /*configuration=*/1);
1172 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1176 rc = libusb_claim_interface(devh, 0);
1178 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1182 // Alternate setting 1 is output, alternate setting 2 is input.
1183 // Card is reset when switching alternates, so the driver uses
1184 // this “double switch” when it wants to reset.
1186 // There's also alternate settings 3 and 4, which seem to be
1187 // like 1 and 2 except they advertise less bandwidth needed.
1188 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1190 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1191 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1192 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1193 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1194 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1198 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1200 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1204 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1206 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1212 rc = libusb_claim_interface(devh, 3);
1214 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1220 // 44 is some kind of timer register (first 16 bits count upwards)
1221 // 24 is some sort of watchdog?
1222 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1223 // (or will go to 0x73c60010?), also seen 0x73c60100
1224 // 12 also changes all the time, unclear why
1225 // 16 seems to be autodetected mode somehow
1226 // -- this is e00115e0 after reset?
1227 // ed0115e0 after mode change [to output?]
1228 // 2d0015e0 after more mode change [to input]
1229 // ed0115e0 after more mode change
1230 // 2d0015e0 after more mode change
1232 // 390115e0 seems to indicate we have signal
1233 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1235 // 200015e0 on startup
1236 // changes to 250115e0 when we sync to the signal
1238 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1240 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1242 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1243 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1245 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1246 // perhaps some of them are related to analog output?
1248 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1249 // but the driver sets it to 0x8036802a at some point.
1251 // all of this is on request 214/215. other requests (192, 219,
1252 // 222, 223, 224) are used for firmware upgrade. Probably best to
1253 // stay out of it unless you know what you're doing.
1257 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1260 // 0x01 - stable signal
1261 // 0x04 - deep color
1262 // 0x08 - unknown (audio??)
1266 update_capture_mode();
1274 static const ctrl ctrls[] = {
1275 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1276 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1278 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1279 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1280 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1281 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1284 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1285 uint32_t flipped = htonl(ctrls[req].data);
1286 static uint8_t value[4];
1287 memcpy(value, &flipped, sizeof(flipped));
1288 int size = sizeof(value);
1289 //if (ctrls[req].request == 215) size = 0;
1290 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1291 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1293 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1297 if (ctrls[req].index == 16 && rc == 4) {
1298 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1302 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1303 for (int i = 0; i < rc; ++i) {
1304 printf("%02x", value[i]);
1313 static int my_index = 0;
1314 static uint8_t value[4];
1315 int size = sizeof(value);
1316 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1317 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1319 fprintf(stderr, "Error on control\n");
1322 printf("rc=%d index=%d: 0x", rc, my_index);
1323 for (int i = 0; i < rc; ++i) {
1324 printf("%02x", value[i]);
1331 // set up an asynchronous transfer of the timer register
1332 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1333 static int completed = 0;
1335 xfr = libusb_alloc_transfer(0);
1336 libusb_fill_control_setup(cmdbuf,
1337 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1338 /*index=*/44, /*length=*/4);
1339 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1340 xfr->user_data = this;
1341 libusb_submit_transfer(xfr);
1343 // set up an asynchronous transfer of register 24
1344 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1345 static int completed2 = 0;
1347 xfr = libusb_alloc_transfer(0);
1348 libusb_fill_control_setup(cmdbuf2,
1349 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1350 /*index=*/24, /*length=*/4);
1351 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1352 xfr->user_data = this;
1353 libusb_submit_transfer(xfr);
1356 // set up an asynchronous transfer of the register dump
1357 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1358 static int completed3 = 0;
1360 xfr = libusb_alloc_transfer(0);
1361 libusb_fill_control_setup(cmdbuf3,
1362 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1363 /*index=*/current_register, /*length=*/4);
1364 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1365 xfr->user_data = this;
1366 //libusb_submit_transfer(xfr);
1368 //audiofp = fopen("audio.raw", "wb");
1370 // set up isochronous transfers for audio and video
1371 for (int e = 3; e <= 4; ++e) {
1372 int num_transfers = 6;
1373 for (int i = 0; i < num_transfers; ++i) {
1375 int num_iso_pack, size;
1377 // Allocate for minimum width (because that will give us the most
1378 // number of packets, so we don't need to reallocate, but we'll
1379 // default to 720p for the first frame.
1380 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1381 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1382 buf_size = USB_VIDEO_TRANSFER_SIZE;
1386 buf_size = num_iso_pack * size;
1388 int num_bytes = num_iso_pack * size;
1389 assert(size_t(num_bytes) <= buf_size);
1390 #if LIBUSB_API_VERSION >= 0x01000105
1391 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1393 uint8_t *buf = nullptr;
1395 if (buf == nullptr) {
1396 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1397 #if LIBUSB_API_VERSION >= 0x01000105
1398 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1400 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1402 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1403 buf = new uint8_t[num_bytes];
1406 xfr = libusb_alloc_transfer(num_iso_pack);
1408 fprintf(stderr, "oom\n");
1412 int ep = LIBUSB_ENDPOINT_IN | e;
1413 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1414 num_iso_pack, cb_xfr, nullptr, 0);
1415 libusb_set_iso_packet_lengths(xfr, size);
1416 xfr->user_data = this;
1419 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1422 iso_xfrs.push_back(xfr);
1427 void BMUSBCapture::start_bm_capture()
1430 for (libusb_transfer *xfr : iso_xfrs) {
1431 int rc = libusb_submit_transfer(xfr);
1434 //printf("num_bytes=%d\n", num_bytes);
1435 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1436 xfr->endpoint, i, libusb_error_name(rc));
1443 libusb_release_interface(devh, 0);
1447 libusb_exit(nullptr);
1452 void BMUSBCapture::stop_dequeue_thread()
1454 dequeue_thread_should_quit = true;
1455 queues_not_empty.notify_all();
1456 dequeue_thread.join();
1459 void BMUSBCapture::start_bm_thread()
1461 // Devices leaving are discovered by seeing the isochronous packets
1462 // coming back with errors, so only care about devices joining.
1463 if (card_connected_callback != nullptr) {
1464 if (libusb_hotplug_register_callback(
1465 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1466 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1467 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1468 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1473 should_quit = false;
1474 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1477 void BMUSBCapture::stop_bm_thread()
1483 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1485 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1486 VideoMode auto_mode;
1487 auto_mode.name = "Autodetect";
1488 auto_mode.autodetect = true;
1489 return {{ 0, auto_mode }};
1492 uint32_t BMUSBCapture::get_current_video_mode() const
1494 return 0; // Matches get_available_video_modes().
1497 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1499 assert(video_mode_id == 0); // Matches get_available_video_modes().
1502 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1505 { 0x00000000, "HDMI/SDI" },
1506 { 0x02000000, "Component" },
1507 { 0x04000000, "Composite" },
1508 { 0x06000000, "S-video" }
1512 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1514 assert((video_input_id & ~0x06000000) == 0);
1515 current_video_input = video_input_id;
1516 update_capture_mode();
1519 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1522 { 0x00000000, "Embedded" },
1523 { 0x10000000, "Analog" }
1527 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1529 assert((audio_input_id & ~0x10000000) == 0);
1530 current_audio_input = audio_input_id;
1531 update_capture_mode();
1534 void BMUSBCapture::update_capture_mode()
1536 if (devh == nullptr) {
1540 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1541 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1542 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1543 mode |= htonl(0x20000000);
1545 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1548 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1549 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1551 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1556 } // namespace bmusb