1 // Intensity Shuttle USB3 capture driver, v0.7.0
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
203 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
204 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
205 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
206 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
207 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
208 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
209 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
210 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
211 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
212 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
213 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
214 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
215 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
217 for (const VideoFormatEntry &entry : entries) {
218 if (normalized_video_format == entry.normalized_video_format) {
219 decoded_video_format->width = entry.width;
220 decoded_video_format->height = entry.height;
221 if (video_format & 0x0800) {
222 decoded_video_format->stride = entry.width * 2;
224 decoded_video_format->stride = v210_stride(entry.width);
226 decoded_video_format->second_field_start = entry.second_field_start;
227 decoded_video_format->extra_lines_top = entry.extra_lines_top;
228 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
229 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
230 decoded_video_format->frame_rate_den = entry.frame_rate_den;
231 decoded_video_format->interlaced = entry.interlaced;
236 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
237 decoded_video_format->width = 1280;
238 decoded_video_format->height = 720;
239 decoded_video_format->stride = 1280 * 2;
240 decoded_video_format->frame_rate_nom = 60;
241 decoded_video_format->frame_rate_den = 1;
245 // There are seemingly no direct indicators of sample rate; you just get
246 // one frame's worth and have to guess from that.
247 int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
249 size_t num_samples = len / 3 / 8;
250 size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
252 // See if we match or are very close to any of the mandatory HDMI sample rates.
253 const int candidate_sample_rates[] = { 32000, 44100, 48000 };
254 for (int rate : candidate_sample_rates) {
255 if (abs(int(num_samples_per_second) - rate) < 50) {
260 fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
261 num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
267 FrameAllocator::~FrameAllocator() {}
269 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
270 : frame_size(frame_size)
272 for (size_t i = 0; i < num_queued_frames; ++i) {
273 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
277 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
282 unique_lock<mutex> lock(freelist_mutex); // Meh.
283 if (freelist.empty()) {
284 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
287 vf.data = freelist.top().release();
288 vf.size = frame_size;
289 freelist.pop(); // Meh.
294 void MallocFrameAllocator::release_frame(Frame frame)
296 if (frame.overflow > 0) {
297 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
299 unique_lock<mutex> lock(freelist_mutex);
300 freelist.push(unique_ptr<uint8_t[]>(frame.data));
303 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
308 return (b - a < 0x8000);
310 int wrap_b = 0x10000 + int(b);
311 return (wrap_b - a < 0x8000);
315 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
317 unique_lock<mutex> lock(queue_lock);
318 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
319 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
320 q->back().timecode, timecode);
321 frame.owner->release_frame(frame);
327 qf.timecode = timecode;
329 q->push_back(move(qf));
330 queues_not_empty.notify_one(); // might be spurious
333 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
335 FILE *fp = fopen(filename, "wb");
336 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
337 printf("short write!\n");
342 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
344 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
347 void BMUSBCapture::dequeue_thread_func()
349 char thread_name[16];
350 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
351 pthread_setname_np(pthread_self(), thread_name);
353 if (has_dequeue_callbacks) {
354 dequeue_init_callback();
356 size_t last_sample_rate = 48000;
357 while (!dequeue_thread_should_quit) {
358 unique_lock<mutex> lock(queue_lock);
359 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
361 if (dequeue_thread_should_quit) break;
363 uint16_t video_timecode = pending_video_frames.front().timecode;
364 uint16_t audio_timecode = pending_audio_frames.front().timecode;
365 AudioFormat audio_format;
366 audio_format.bits_per_sample = 24;
367 audio_format.num_channels = 8;
368 audio_format.sample_rate = last_sample_rate;
369 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
370 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
372 QueuedFrame video_frame = pending_video_frames.front();
373 pending_video_frames.pop_front();
375 video_frame_allocator->release_frame(video_frame.frame);
376 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
377 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
379 QueuedFrame audio_frame = pending_audio_frames.front();
380 pending_audio_frames.pop_front();
382 audio_format.id = audio_frame.format;
384 // Use the video format of the pending frame.
385 QueuedFrame video_frame = pending_video_frames.front();
386 VideoFormat video_format;
387 decode_video_format(video_frame.format, &video_format);
389 frame_callback(audio_timecode,
390 FrameAllocator::Frame(), 0, video_format,
391 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
393 QueuedFrame video_frame = pending_video_frames.front();
394 QueuedFrame audio_frame = pending_audio_frames.front();
395 pending_audio_frames.pop_front();
396 pending_video_frames.pop_front();
401 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
402 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
403 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
406 VideoFormat video_format;
407 audio_format.id = audio_frame.format;
408 if (decode_video_format(video_frame.format, &video_format)) {
409 if (audio_frame.frame.len != 0) {
410 audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
411 last_sample_rate = audio_format.sample_rate;
413 frame_callback(video_timecode,
414 video_frame.frame, HEADER_SIZE, video_format,
415 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
417 video_frame_allocator->release_frame(video_frame.frame);
418 audio_format.sample_rate = last_sample_rate;
419 frame_callback(video_timecode,
420 FrameAllocator::Frame(), 0, video_format,
421 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
425 if (has_dequeue_callbacks) {
426 dequeue_cleanup_callback();
430 void BMUSBCapture::start_new_frame(const uint8_t *start)
432 uint16_t format = (start[3] << 8) | start[2];
433 uint16_t timecode = (start[1] << 8) | start[0];
435 if (current_video_frame.len > 0) {
436 current_video_frame.received_timestamp = steady_clock::now();
438 // If format is 0x0800 (no signal), add a fake (empty) audio
439 // frame to get it out of the queue.
440 // TODO: Figure out if there are other formats that come with
441 // no audio, and treat them the same.
442 if (format == 0x0800) {
443 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
444 if (fake_audio_frame.data == nullptr) {
445 // Oh well, it's just a no-signal frame anyway.
446 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
447 current_video_frame.owner->release_frame(current_video_frame);
448 current_video_frame = video_frame_allocator->alloc_frame();
451 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
454 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
456 // Update the assumed frame width. We might be one frame too late on format changes,
457 // but it's much better than asking the user to choose manually.
458 VideoFormat video_format;
459 if (decode_video_format(format, &video_format)) {
460 assumed_frame_width = video_format.width;
463 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
465 // //start[7], start[6], start[5], start[4],
466 // read_current_frame, FRAME_SIZE);
468 current_video_frame = video_frame_allocator->alloc_frame();
469 //if (current_video_frame.data == nullptr) {
470 // read_current_frame = -1;
472 // read_current_frame = 0;
476 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
478 uint16_t format = (start[3] << 8) | start[2];
479 uint16_t timecode = (start[1] << 8) | start[0];
480 if (current_audio_frame.len > 0) {
481 current_audio_frame.received_timestamp = steady_clock::now();
482 //dump_audio_block();
483 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
485 //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
486 // format, timecode);
487 current_audio_frame = audio_frame_allocator->alloc_frame();
491 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
493 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
494 for (unsigned j = 0; j < pack->actual_length; j++) {
495 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
496 printf("%02x", xfr->buffer[j + offset]);
499 else if ((j % 8) == 7)
507 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
510 uint8_t *dptr1 = dest1;
511 uint8_t *dptr2 = dest2;
513 for (size_t i = 0; i < n; i += 2) {
519 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
521 if (current_frame->data == nullptr ||
522 current_frame->len > current_frame->size ||
527 int bytes = end - start;
528 if (current_frame->len + bytes > current_frame->size) {
529 current_frame->overflow = current_frame->len + bytes - current_frame->size;
530 current_frame->len = current_frame->size;
531 if (current_frame->overflow > 1048576) {
532 printf("%d bytes overflow after last %s frame\n",
533 int(current_frame->overflow), frame_type_name);
534 current_frame->overflow = 0;
538 if (current_frame->interleaved) {
539 uint8_t *data = current_frame->data + current_frame->len / 2;
540 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
541 if (current_frame->len % 2 == 1) {
545 if (bytes % 2 == 1) {
548 ++current_frame->len;
551 memcpy_interleaved(data, data2, start, bytes);
552 current_frame->len += bytes;
554 memcpy(current_frame->data + current_frame->len, start, bytes);
555 current_frame->len += bytes;
561 void avx2_dump(const char *name, __m256i n)
563 printf("%-10s:", name);
564 printf(" %02x", _mm256_extract_epi8(n, 0));
565 printf(" %02x", _mm256_extract_epi8(n, 1));
566 printf(" %02x", _mm256_extract_epi8(n, 2));
567 printf(" %02x", _mm256_extract_epi8(n, 3));
568 printf(" %02x", _mm256_extract_epi8(n, 4));
569 printf(" %02x", _mm256_extract_epi8(n, 5));
570 printf(" %02x", _mm256_extract_epi8(n, 6));
571 printf(" %02x", _mm256_extract_epi8(n, 7));
573 printf(" %02x", _mm256_extract_epi8(n, 8));
574 printf(" %02x", _mm256_extract_epi8(n, 9));
575 printf(" %02x", _mm256_extract_epi8(n, 10));
576 printf(" %02x", _mm256_extract_epi8(n, 11));
577 printf(" %02x", _mm256_extract_epi8(n, 12));
578 printf(" %02x", _mm256_extract_epi8(n, 13));
579 printf(" %02x", _mm256_extract_epi8(n, 14));
580 printf(" %02x", _mm256_extract_epi8(n, 15));
582 printf(" %02x", _mm256_extract_epi8(n, 16));
583 printf(" %02x", _mm256_extract_epi8(n, 17));
584 printf(" %02x", _mm256_extract_epi8(n, 18));
585 printf(" %02x", _mm256_extract_epi8(n, 19));
586 printf(" %02x", _mm256_extract_epi8(n, 20));
587 printf(" %02x", _mm256_extract_epi8(n, 21));
588 printf(" %02x", _mm256_extract_epi8(n, 22));
589 printf(" %02x", _mm256_extract_epi8(n, 23));
591 printf(" %02x", _mm256_extract_epi8(n, 24));
592 printf(" %02x", _mm256_extract_epi8(n, 25));
593 printf(" %02x", _mm256_extract_epi8(n, 26));
594 printf(" %02x", _mm256_extract_epi8(n, 27));
595 printf(" %02x", _mm256_extract_epi8(n, 28));
596 printf(" %02x", _mm256_extract_epi8(n, 29));
597 printf(" %02x", _mm256_extract_epi8(n, 30));
598 printf(" %02x", _mm256_extract_epi8(n, 31));
603 #ifndef HAS_MULTIVERSIONING
605 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
607 // No fast path possible unless we have multiversioning.
611 #else // defined(HAS_MULTIVERSIONING)
613 __attribute__((target("sse4.1")))
614 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
616 __attribute__((target("avx2")))
617 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
619 // Does a memcpy and memchr in one to reduce processing time.
620 // Note that the benefit is somewhat limited if your L3 cache is small,
621 // as you'll (unfortunately) spend most of the time loading the data
624 // Complicated cases are left to the slow path; it basically stops copying
625 // up until the first instance of "sync_char" (usually a bit before, actually).
626 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
627 // data, and what we really need this for is the 00 00 ff ff marker in video data.
628 __attribute__((target("default")))
629 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
631 // No fast path possible unless we have SSE 4.1 or higher.
635 __attribute__((target("sse4.1", "avx2")))
636 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
638 if (current_frame->data == nullptr ||
639 current_frame->len > current_frame->size ||
643 size_t orig_bytes = limit - start;
644 if (orig_bytes < 128) {
649 // Don't read more bytes than we can write.
650 limit = min(limit, start + (current_frame->size - current_frame->len));
652 // Align end to 32 bytes.
653 limit = (const uint8_t *)(intptr_t(limit) & ~31);
655 if (start >= limit) {
659 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
660 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
661 if (aligned_start != start) {
662 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
663 if (sync_start == nullptr) {
664 add_to_frame(current_frame, "", start, aligned_start);
666 add_to_frame(current_frame, "", start, sync_start);
671 // Make the length a multiple of 64.
672 if (current_frame->interleaved) {
673 if (((limit - aligned_start) % 64) != 0) {
676 assert(((limit - aligned_start) % 64) == 0);
679 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
682 __attribute__((target("avx2")))
683 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
685 const __m256i needle = _mm256_set1_epi8(sync_char);
687 const __restrict __m256i *in = (const __m256i *)aligned_start;
688 if (current_frame->interleaved) {
689 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
690 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
691 if (current_frame->len % 2 == 1) {
695 __m256i shuffle_cw = _mm256_set_epi8(
696 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
697 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
698 while (in < (const __m256i *)limit) {
699 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
700 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
701 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
703 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
704 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
705 __m256i found = _mm256_or_si256(found1, found2);
707 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
708 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
710 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
711 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
713 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
714 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
716 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
717 _mm256_storeu_si256(out2, hi);
719 if (!_mm256_testz_si256(found, found)) {
727 current_frame->len += (uint8_t *)in - aligned_start;
729 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
730 while (in < (const __m256i *)limit) {
731 __m256i data = _mm256_load_si256(in);
732 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
733 __m256i found = _mm256_cmpeq_epi8(data, needle);
734 if (!_mm256_testz_si256(found, found)) {
741 current_frame->len = (uint8_t *)out - current_frame->data;
744 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
745 return (const uint8_t *)in;
748 __attribute__((target("sse4.1")))
749 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
751 const __m128i needle = _mm_set1_epi8(sync_char);
753 const __m128i *in = (const __m128i *)aligned_start;
754 if (current_frame->interleaved) {
755 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
756 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
757 if (current_frame->len % 2 == 1) {
761 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
762 while (in < (const __m128i *)limit) {
763 __m128i data1 = _mm_load_si128(in);
764 __m128i data2 = _mm_load_si128(in + 1);
765 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
766 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
767 __m128i data1_hi = _mm_srli_epi16(data1, 8);
768 __m128i data2_hi = _mm_srli_epi16(data2, 8);
769 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
770 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
771 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
772 _mm_storeu_si128(out2, hi);
773 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
774 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
775 if (!_mm_testz_si128(found1, found1) ||
776 !_mm_testz_si128(found2, found2)) {
784 current_frame->len += (uint8_t *)in - aligned_start;
786 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
787 while (in < (const __m128i *)limit) {
788 __m128i data = _mm_load_si128(in);
789 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
790 __m128i found = _mm_cmpeq_epi8(data, needle);
791 if (!_mm_testz_si128(found, found)) {
798 current_frame->len = (uint8_t *)out - current_frame->data;
801 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
802 return (const uint8_t *)in;
805 #endif // defined(HAS_MULTIVERSIONING)
807 void decode_packs(const libusb_transfer *xfr,
808 const char *sync_pattern,
810 FrameAllocator::Frame *current_frame,
811 const char *frame_type_name,
812 function<void(const uint8_t *start)> start_callback)
815 for (int i = 0; i < xfr->num_iso_packets; i++) {
816 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
818 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
819 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
824 const uint8_t *start = xfr->buffer + offset;
825 const uint8_t *limit = start + pack->actual_length;
826 while (start < limit) { // Usually runs only one iteration.
827 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
828 if (start == limit) break;
829 assert(start < limit);
831 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
832 if (start_next_frame == nullptr) {
833 // add the rest of the buffer
834 add_to_frame(current_frame, frame_type_name, start, limit);
837 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
838 start = start_next_frame + sync_length; // skip sync
839 start_callback(start);
843 dump_pack(xfr, offset, pack);
845 offset += pack->length;
849 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
851 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
852 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
853 fprintf(stderr, "error: transfer status %d\n", xfr->status);
854 libusb_free_transfer(xfr);
858 assert(xfr->user_data != nullptr);
859 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
861 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
862 if (!usb->disconnected) {
863 fprintf(stderr, "Device went away, stopping transfers.\n");
864 usb->disconnected = true;
865 if (usb->card_disconnected_callback) {
866 usb->card_disconnected_callback();
869 // Don't reschedule the transfer; the loop will stop by itself.
873 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
874 if (xfr->endpoint == 0x84) {
875 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
877 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
879 // Update the transfer with the new assumed width, if we're in the process of changing formats.
880 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
883 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
884 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
885 uint8_t *buf = libusb_control_transfer_get_data(xfr);
887 if (setup->wIndex == 44) {
888 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
890 printf("read register %2d: 0x%02x%02x%02x%02x\n",
891 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
894 memcpy(usb->register_file + usb->current_register, buf, 4);
895 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
896 if (usb->current_register == 0) {
897 // read through all of them
898 printf("register dump:");
899 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
900 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
904 libusb_fill_control_setup(xfr->buffer,
905 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
906 /*index=*/usb->current_register, /*length=*/4);
911 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
912 for (i = 0; i < xfr->actual_length; i++) {
913 printf("%02x", xfr->buffer[i]);
923 int rc = libusb_submit_transfer(xfr);
925 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
930 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
932 if (card_connected_callback != nullptr) {
933 libusb_device_descriptor desc;
934 if (libusb_get_device_descriptor(dev, &desc) < 0) {
935 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
936 libusb_unref_device(dev);
940 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
941 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
942 card_connected_callback(dev); // Callback takes ownership.
946 libusb_unref_device(dev);
950 void BMUSBCapture::usb_thread_func()
953 memset(¶m, 0, sizeof(param));
954 param.sched_priority = 1;
955 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
956 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
958 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
959 while (!should_quit) {
960 timeval sec { 1, 0 };
961 int rc = libusb_handle_events_timeout(nullptr, &sec);
962 if (rc != LIBUSB_SUCCESS)
969 struct USBCardDevice {
972 libusb_device *device;
975 const char *get_product_name(uint16_t product)
977 if (product == 0xbd3b) {
978 return "Intensity Shuttle";
979 } else if (product == 0xbd4f) {
980 return "UltraStudio SDI";
987 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
989 const char *product_name = get_product_name(product);
992 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
993 id, bus, port, product_name);
997 vector<USBCardDevice> find_all_cards()
999 libusb_device **devices;
1000 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
1001 if (num_devices == -1) {
1002 fprintf(stderr, "Error finding USB devices\n");
1005 vector<USBCardDevice> found_cards;
1006 for (ssize_t i = 0; i < num_devices; ++i) {
1007 libusb_device_descriptor desc;
1008 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
1009 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
1013 uint8_t bus = libusb_get_bus_number(devices[i]);
1014 uint8_t port = libusb_get_port_number(devices[i]);
1016 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
1017 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
1018 libusb_unref_device(devices[i]);
1022 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
1024 libusb_free_device_list(devices, 0);
1026 // Sort the devices to get a consistent ordering.
1027 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
1028 if (a.product != b.product)
1029 return a.product < b.product;
1031 return a.bus < b.bus;
1032 return a.port < b.port;
1038 libusb_device_handle *open_card(int card_index, string *description)
1040 vector<USBCardDevice> found_cards = find_all_cards();
1042 for (size_t i = 0; i < found_cards.size(); ++i) {
1043 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1044 fprintf(stderr, "%s\n", tmp_description.c_str());
1045 if (i == size_t(card_index)) {
1046 *description = tmp_description;
1050 if (size_t(card_index) >= found_cards.size()) {
1051 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1055 libusb_device_handle *devh;
1056 int rc = libusb_open(found_cards[card_index].device, &devh);
1058 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1062 for (size_t i = 0; i < found_cards.size(); ++i) {
1063 libusb_unref_device(found_cards[i].device);
1069 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1071 uint8_t bus = libusb_get_bus_number(dev);
1072 uint8_t port = libusb_get_port_number(dev);
1074 libusb_device_descriptor desc;
1075 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1076 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1080 *description = get_card_description(card_index, bus, port, desc.idProduct);
1082 libusb_device_handle *devh;
1083 int rc = libusb_open(dev, &devh);
1085 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1094 unsigned BMUSBCapture::num_cards()
1096 int rc = libusb_init(nullptr);
1098 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1102 vector<USBCardDevice> found_cards = find_all_cards();
1103 unsigned ret = found_cards.size();
1104 for (size_t i = 0; i < found_cards.size(); ++i) {
1105 libusb_unref_device(found_cards[i].device);
1110 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1112 current_pixel_format = pixel_format;
1113 update_capture_mode();
1116 void BMUSBCapture::configure_card()
1118 if (video_frame_allocator == nullptr) {
1119 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1120 set_video_frame_allocator(owned_video_frame_allocator.get());
1122 if (audio_frame_allocator == nullptr) {
1123 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1124 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1126 dequeue_thread_should_quit = false;
1127 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1130 struct libusb_transfer *xfr;
1132 rc = libusb_init(nullptr);
1134 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1138 if (dev == nullptr) {
1139 devh = open_card(card_index, &description);
1141 devh = open_card(card_index, dev, &description);
1142 libusb_unref_device(dev);
1145 fprintf(stderr, "Error finding USB device\n");
1149 libusb_config_descriptor *config;
1150 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1152 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1157 printf("%d interface\n", config->bNumInterfaces);
1158 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1159 printf(" interface %d\n", interface_number);
1160 const libusb_interface *interface = &config->interface[interface_number];
1161 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1162 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1163 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1164 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1165 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1166 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1172 rc = libusb_set_configuration(devh, /*configuration=*/1);
1174 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1178 rc = libusb_claim_interface(devh, 0);
1180 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1184 // Alternate setting 1 is output, alternate setting 2 is input.
1185 // Card is reset when switching alternates, so the driver uses
1186 // this “double switch” when it wants to reset.
1188 // There's also alternate settings 3 and 4, which seem to be
1189 // like 1 and 2 except they advertise less bandwidth needed.
1190 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1192 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1193 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1194 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1195 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1196 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1200 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1202 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1206 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1208 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1214 rc = libusb_claim_interface(devh, 3);
1216 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1222 // 44 is some kind of timer register (first 16 bits count upwards)
1223 // 24 is some sort of watchdog?
1224 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1225 // (or will go to 0x73c60010?), also seen 0x73c60100
1226 // 12 also changes all the time, unclear why
1227 // 16 seems to be autodetected mode somehow
1228 // -- this is e00115e0 after reset?
1229 // ed0115e0 after mode change [to output?]
1230 // 2d0015e0 after more mode change [to input]
1231 // ed0115e0 after more mode change
1232 // 2d0015e0 after more mode change
1234 // 390115e0 seems to indicate we have signal
1235 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1237 // 200015e0 on startup
1238 // changes to 250115e0 when we sync to the signal
1240 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1242 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1244 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1245 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1247 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1248 // perhaps some of them are related to analog output?
1250 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1251 // but the driver sets it to 0x8036802a at some point.
1253 // all of this is on request 214/215. other requests (192, 219,
1254 // 222, 223, 224) are used for firmware upgrade. Probably best to
1255 // stay out of it unless you know what you're doing.
1259 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1262 // 0x01 - stable signal
1263 // 0x04 - deep color
1264 // 0x08 - unknown (audio??)
1268 update_capture_mode();
1276 static const ctrl ctrls[] = {
1277 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1278 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1280 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1281 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1282 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1283 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1286 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1287 uint32_t flipped = htonl(ctrls[req].data);
1288 static uint8_t value[4];
1289 memcpy(value, &flipped, sizeof(flipped));
1290 int size = sizeof(value);
1291 //if (ctrls[req].request == 215) size = 0;
1292 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1293 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1295 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1299 if (ctrls[req].index == 16 && rc == 4) {
1300 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1304 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1305 for (int i = 0; i < rc; ++i) {
1306 printf("%02x", value[i]);
1315 static int my_index = 0;
1316 static uint8_t value[4];
1317 int size = sizeof(value);
1318 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1319 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1321 fprintf(stderr, "Error on control\n");
1324 printf("rc=%d index=%d: 0x", rc, my_index);
1325 for (int i = 0; i < rc; ++i) {
1326 printf("%02x", value[i]);
1333 // set up an asynchronous transfer of the timer register
1334 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1335 static int completed = 0;
1337 xfr = libusb_alloc_transfer(0);
1338 libusb_fill_control_setup(cmdbuf,
1339 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1340 /*index=*/44, /*length=*/4);
1341 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1342 xfr->user_data = this;
1343 libusb_submit_transfer(xfr);
1345 // set up an asynchronous transfer of register 24
1346 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1347 static int completed2 = 0;
1349 xfr = libusb_alloc_transfer(0);
1350 libusb_fill_control_setup(cmdbuf2,
1351 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1352 /*index=*/24, /*length=*/4);
1353 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1354 xfr->user_data = this;
1355 libusb_submit_transfer(xfr);
1358 // set up an asynchronous transfer of the register dump
1359 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1360 static int completed3 = 0;
1362 xfr = libusb_alloc_transfer(0);
1363 libusb_fill_control_setup(cmdbuf3,
1364 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1365 /*index=*/current_register, /*length=*/4);
1366 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1367 xfr->user_data = this;
1368 //libusb_submit_transfer(xfr);
1370 //audiofp = fopen("audio.raw", "wb");
1372 // set up isochronous transfers for audio and video
1373 for (int e = 3; e <= 4; ++e) {
1374 int num_transfers = 6;
1375 for (int i = 0; i < num_transfers; ++i) {
1377 int num_iso_pack, size;
1379 // Allocate for minimum width (because that will give us the most
1380 // number of packets, so we don't need to reallocate, but we'll
1381 // default to 720p for the first frame.
1382 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1383 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1384 buf_size = USB_VIDEO_TRANSFER_SIZE;
1388 buf_size = num_iso_pack * size;
1390 int num_bytes = num_iso_pack * size;
1391 assert(size_t(num_bytes) <= buf_size);
1392 #if LIBUSB_API_VERSION >= 0x01000105
1393 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1395 uint8_t *buf = nullptr;
1397 if (buf == nullptr) {
1398 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1399 #if LIBUSB_API_VERSION >= 0x01000105
1400 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1402 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1404 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1405 buf = new uint8_t[num_bytes];
1408 xfr = libusb_alloc_transfer(num_iso_pack);
1410 fprintf(stderr, "oom\n");
1414 int ep = LIBUSB_ENDPOINT_IN | e;
1415 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1416 num_iso_pack, cb_xfr, nullptr, 0);
1417 libusb_set_iso_packet_lengths(xfr, size);
1418 xfr->user_data = this;
1421 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1424 iso_xfrs.push_back(xfr);
1429 void BMUSBCapture::start_bm_capture()
1432 for (libusb_transfer *xfr : iso_xfrs) {
1433 int rc = libusb_submit_transfer(xfr);
1436 //printf("num_bytes=%d\n", num_bytes);
1437 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1438 xfr->endpoint, i, libusb_error_name(rc));
1445 libusb_release_interface(devh, 0);
1449 libusb_exit(nullptr);
1454 void BMUSBCapture::stop_dequeue_thread()
1456 dequeue_thread_should_quit = true;
1457 queues_not_empty.notify_all();
1458 dequeue_thread.join();
1461 void BMUSBCapture::start_bm_thread()
1463 // Devices leaving are discovered by seeing the isochronous packets
1464 // coming back with errors, so only care about devices joining.
1465 if (card_connected_callback != nullptr) {
1466 if (libusb_hotplug_register_callback(
1467 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1468 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1469 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1470 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1475 should_quit = false;
1476 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1479 void BMUSBCapture::stop_bm_thread()
1485 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1487 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1488 VideoMode auto_mode;
1489 auto_mode.name = "Autodetect";
1490 auto_mode.autodetect = true;
1491 return {{ 0, auto_mode }};
1494 uint32_t BMUSBCapture::get_current_video_mode() const
1496 return 0; // Matches get_available_video_modes().
1499 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1501 assert(video_mode_id == 0); // Matches get_available_video_modes().
1504 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1507 { 0x00000000, "HDMI/SDI" },
1508 { 0x02000000, "Component" },
1509 { 0x04000000, "Composite" },
1510 { 0x06000000, "S-video" }
1514 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1516 assert((video_input_id & ~0x06000000) == 0);
1517 current_video_input = video_input_id;
1518 update_capture_mode();
1521 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1524 { 0x00000000, "Embedded" },
1525 { 0x10000000, "Analog" }
1529 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1531 assert((audio_input_id & ~0x10000000) == 0);
1532 current_audio_input = audio_input_id;
1533 update_capture_mode();
1536 void BMUSBCapture::update_capture_mode()
1538 if (devh == nullptr) {
1542 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1543 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1544 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1545 mode |= htonl(0x20000000);
1547 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1550 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1551 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1553 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1558 } // namespace bmusb