1 // Intensity Shuttle USB3 capture driver, v0.7.2
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
203 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
204 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
205 { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
206 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
207 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
208 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
209 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
210 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
211 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
212 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
213 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
214 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
215 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
216 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
218 for (const VideoFormatEntry &entry : entries) {
219 if (normalized_video_format == entry.normalized_video_format) {
220 decoded_video_format->width = entry.width;
221 decoded_video_format->height = entry.height;
222 if (video_format & 0x0800) {
223 decoded_video_format->stride = entry.width * 2;
225 decoded_video_format->stride = v210_stride(entry.width);
227 decoded_video_format->second_field_start = entry.second_field_start;
228 decoded_video_format->extra_lines_top = entry.extra_lines_top;
229 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
230 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
231 decoded_video_format->frame_rate_den = entry.frame_rate_den;
232 decoded_video_format->interlaced = entry.interlaced;
237 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
238 decoded_video_format->width = 1280;
239 decoded_video_format->height = 720;
240 decoded_video_format->stride = 1280 * 2;
241 decoded_video_format->frame_rate_nom = 60;
242 decoded_video_format->frame_rate_den = 1;
246 // There are seemingly no direct indicators of sample rate; you just get
247 // one frame's worth and have to guess from that.
248 int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
250 size_t num_samples = len / 3 / 8;
251 size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
253 // See if we match or are very close to any of the mandatory HDMI sample rates.
254 const int candidate_sample_rates[] = { 32000, 44100, 48000 };
255 for (int rate : candidate_sample_rates) {
256 if (abs(int(num_samples_per_second) - rate) <= 100) {
261 fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
262 num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
268 FrameAllocator::~FrameAllocator() {}
270 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
271 : frame_size(frame_size)
273 for (size_t i = 0; i < num_queued_frames; ++i) {
274 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
278 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
283 unique_lock<mutex> lock(freelist_mutex); // Meh.
284 if (freelist.empty()) {
285 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
288 vf.data = freelist.top().release();
289 vf.size = frame_size;
290 freelist.pop(); // Meh.
295 void MallocFrameAllocator::release_frame(Frame frame)
297 if (frame.overflow > 0) {
298 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
300 unique_lock<mutex> lock(freelist_mutex);
301 freelist.push(unique_ptr<uint8_t[]>(frame.data));
304 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
309 return (b - a < 0x8000);
311 int wrap_b = 0x10000 + int(b);
312 return (wrap_b - a < 0x8000);
316 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
318 unique_lock<mutex> lock(queue_lock);
319 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
320 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
321 q->back().timecode, timecode);
322 frame.owner->release_frame(frame);
328 qf.timecode = timecode;
330 q->push_back(move(qf));
331 queues_not_empty.notify_one(); // might be spurious
334 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
336 FILE *fp = fopen(filename, "wb");
337 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
338 printf("short write!\n");
343 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
345 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
348 void BMUSBCapture::dequeue_thread_func()
350 char thread_name[16];
351 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
352 pthread_setname_np(pthread_self(), thread_name);
354 if (has_dequeue_callbacks) {
355 dequeue_init_callback();
357 size_t last_sample_rate = 48000;
358 while (!dequeue_thread_should_quit) {
359 unique_lock<mutex> lock(queue_lock);
360 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
362 if (dequeue_thread_should_quit) break;
364 uint16_t video_timecode = pending_video_frames.front().timecode;
365 uint16_t audio_timecode = pending_audio_frames.front().timecode;
366 AudioFormat audio_format;
367 audio_format.bits_per_sample = 24;
368 audio_format.num_channels = 8;
369 audio_format.sample_rate = last_sample_rate;
370 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
371 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
373 QueuedFrame video_frame = pending_video_frames.front();
374 pending_video_frames.pop_front();
376 video_frame_allocator->release_frame(video_frame.frame);
377 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
378 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
380 QueuedFrame audio_frame = pending_audio_frames.front();
381 pending_audio_frames.pop_front();
383 audio_format.id = audio_frame.format;
385 // Use the video format of the pending frame.
386 QueuedFrame video_frame = pending_video_frames.front();
387 VideoFormat video_format;
388 decode_video_format(video_frame.format, &video_format);
390 frame_callback(audio_timecode,
391 FrameAllocator::Frame(), 0, video_format,
392 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
394 QueuedFrame video_frame = pending_video_frames.front();
395 QueuedFrame audio_frame = pending_audio_frames.front();
396 pending_audio_frames.pop_front();
397 pending_video_frames.pop_front();
402 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
403 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
404 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
407 VideoFormat video_format;
408 audio_format.id = audio_frame.format;
409 if (decode_video_format(video_frame.format, &video_format)) {
410 if (audio_frame.frame.len != 0) {
411 audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
412 last_sample_rate = audio_format.sample_rate;
414 frame_callback(video_timecode,
415 video_frame.frame, HEADER_SIZE, video_format,
416 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
418 video_frame_allocator->release_frame(video_frame.frame);
419 audio_format.sample_rate = last_sample_rate;
420 frame_callback(video_timecode,
421 FrameAllocator::Frame(), 0, video_format,
422 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
426 if (has_dequeue_callbacks) {
427 dequeue_cleanup_callback();
431 void BMUSBCapture::start_new_frame(const uint8_t *start)
433 uint16_t format = (start[3] << 8) | start[2];
434 uint16_t timecode = (start[1] << 8) | start[0];
436 if (current_video_frame.len > 0) {
437 current_video_frame.received_timestamp = steady_clock::now();
439 // If format is 0x0800 (no signal), add a fake (empty) audio
440 // frame to get it out of the queue.
441 // TODO: Figure out if there are other formats that come with
442 // no audio, and treat them the same.
443 if (format == 0x0800) {
444 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
445 if (fake_audio_frame.data == nullptr) {
446 // Oh well, it's just a no-signal frame anyway.
447 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
448 current_video_frame.owner->release_frame(current_video_frame);
449 current_video_frame = video_frame_allocator->alloc_frame();
452 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
455 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
457 // Update the assumed frame width. We might be one frame too late on format changes,
458 // but it's much better than asking the user to choose manually.
459 VideoFormat video_format;
460 if (decode_video_format(format, &video_format)) {
461 assumed_frame_width = video_format.width;
464 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
466 // //start[7], start[6], start[5], start[4],
467 // read_current_frame, FRAME_SIZE);
469 current_video_frame = video_frame_allocator->alloc_frame();
470 //if (current_video_frame.data == nullptr) {
471 // read_current_frame = -1;
473 // read_current_frame = 0;
477 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
479 uint16_t format = (start[3] << 8) | start[2];
480 uint16_t timecode = (start[1] << 8) | start[0];
481 if (current_audio_frame.len > 0) {
482 current_audio_frame.received_timestamp = steady_clock::now();
483 //dump_audio_block();
484 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
486 //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
487 // format, timecode);
488 current_audio_frame = audio_frame_allocator->alloc_frame();
492 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
494 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
495 for (unsigned j = 0; j < pack->actual_length; j++) {
496 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
497 printf("%02x", xfr->buffer[j + offset]);
500 else if ((j % 8) == 7)
508 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
511 uint8_t *dptr1 = dest1;
512 uint8_t *dptr2 = dest2;
514 for (size_t i = 0; i < n; i += 2) {
520 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
522 if (current_frame->data == nullptr ||
523 current_frame->len > current_frame->size ||
528 int bytes = end - start;
529 if (current_frame->len + bytes > current_frame->size) {
530 current_frame->overflow = current_frame->len + bytes - current_frame->size;
531 current_frame->len = current_frame->size;
532 if (current_frame->overflow > 1048576) {
533 printf("%d bytes overflow after last %s frame\n",
534 int(current_frame->overflow), frame_type_name);
535 current_frame->overflow = 0;
539 if (current_frame->interleaved) {
540 uint8_t *data = current_frame->data + current_frame->len / 2;
541 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
542 if (current_frame->len % 2 == 1) {
546 if (bytes % 2 == 1) {
549 ++current_frame->len;
552 memcpy_interleaved(data, data2, start, bytes);
553 current_frame->len += bytes;
555 memcpy(current_frame->data + current_frame->len, start, bytes);
556 current_frame->len += bytes;
562 void avx2_dump(const char *name, __m256i n)
564 printf("%-10s:", name);
565 printf(" %02x", _mm256_extract_epi8(n, 0));
566 printf(" %02x", _mm256_extract_epi8(n, 1));
567 printf(" %02x", _mm256_extract_epi8(n, 2));
568 printf(" %02x", _mm256_extract_epi8(n, 3));
569 printf(" %02x", _mm256_extract_epi8(n, 4));
570 printf(" %02x", _mm256_extract_epi8(n, 5));
571 printf(" %02x", _mm256_extract_epi8(n, 6));
572 printf(" %02x", _mm256_extract_epi8(n, 7));
574 printf(" %02x", _mm256_extract_epi8(n, 8));
575 printf(" %02x", _mm256_extract_epi8(n, 9));
576 printf(" %02x", _mm256_extract_epi8(n, 10));
577 printf(" %02x", _mm256_extract_epi8(n, 11));
578 printf(" %02x", _mm256_extract_epi8(n, 12));
579 printf(" %02x", _mm256_extract_epi8(n, 13));
580 printf(" %02x", _mm256_extract_epi8(n, 14));
581 printf(" %02x", _mm256_extract_epi8(n, 15));
583 printf(" %02x", _mm256_extract_epi8(n, 16));
584 printf(" %02x", _mm256_extract_epi8(n, 17));
585 printf(" %02x", _mm256_extract_epi8(n, 18));
586 printf(" %02x", _mm256_extract_epi8(n, 19));
587 printf(" %02x", _mm256_extract_epi8(n, 20));
588 printf(" %02x", _mm256_extract_epi8(n, 21));
589 printf(" %02x", _mm256_extract_epi8(n, 22));
590 printf(" %02x", _mm256_extract_epi8(n, 23));
592 printf(" %02x", _mm256_extract_epi8(n, 24));
593 printf(" %02x", _mm256_extract_epi8(n, 25));
594 printf(" %02x", _mm256_extract_epi8(n, 26));
595 printf(" %02x", _mm256_extract_epi8(n, 27));
596 printf(" %02x", _mm256_extract_epi8(n, 28));
597 printf(" %02x", _mm256_extract_epi8(n, 29));
598 printf(" %02x", _mm256_extract_epi8(n, 30));
599 printf(" %02x", _mm256_extract_epi8(n, 31));
604 #ifndef HAS_MULTIVERSIONING
606 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
608 // No fast path possible unless we have multiversioning.
612 #else // defined(HAS_MULTIVERSIONING)
614 __attribute__((target("sse4.1")))
615 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
617 __attribute__((target("avx2")))
618 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
620 // Does a memcpy and memchr in one to reduce processing time.
621 // Note that the benefit is somewhat limited if your L3 cache is small,
622 // as you'll (unfortunately) spend most of the time loading the data
625 // Complicated cases are left to the slow path; it basically stops copying
626 // up until the first instance of "sync_char" (usually a bit before, actually).
627 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
628 // data, and what we really need this for is the 00 00 ff ff marker in video data.
629 __attribute__((target("default")))
630 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
632 // No fast path possible unless we have SSE 4.1 or higher.
636 __attribute__((target("sse4.1", "avx2")))
637 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
639 if (current_frame->data == nullptr ||
640 current_frame->len > current_frame->size ||
644 size_t orig_bytes = limit - start;
645 if (orig_bytes < 128) {
650 // Don't read more bytes than we can write.
651 limit = min(limit, start + (current_frame->size - current_frame->len));
653 // Align end to 32 bytes.
654 limit = (const uint8_t *)(intptr_t(limit) & ~31);
656 if (start >= limit) {
660 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
661 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
662 if (aligned_start != start) {
663 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
664 if (sync_start == nullptr) {
665 add_to_frame(current_frame, "", start, aligned_start);
667 add_to_frame(current_frame, "", start, sync_start);
672 // Make the length a multiple of 64.
673 if (current_frame->interleaved) {
674 if (((limit - aligned_start) % 64) != 0) {
677 assert(((limit - aligned_start) % 64) == 0);
680 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
683 __attribute__((target("avx2")))
684 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
686 const __m256i needle = _mm256_set1_epi8(sync_char);
688 const __restrict __m256i *in = (const __m256i *)aligned_start;
689 if (current_frame->interleaved) {
690 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
691 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
692 if (current_frame->len % 2 == 1) {
696 __m256i shuffle_cw = _mm256_set_epi8(
697 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
698 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
699 while (in < (const __m256i *)limit) {
700 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
701 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
702 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
704 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
705 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
706 __m256i found = _mm256_or_si256(found1, found2);
708 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
709 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
711 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
712 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
714 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
715 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
717 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
718 _mm256_storeu_si256(out2, hi);
720 if (!_mm256_testz_si256(found, found)) {
728 current_frame->len += (uint8_t *)in - aligned_start;
730 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
731 while (in < (const __m256i *)limit) {
732 __m256i data = _mm256_load_si256(in);
733 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
734 __m256i found = _mm256_cmpeq_epi8(data, needle);
735 if (!_mm256_testz_si256(found, found)) {
742 current_frame->len = (uint8_t *)out - current_frame->data;
745 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
746 return (const uint8_t *)in;
749 __attribute__((target("sse4.1")))
750 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
752 const __m128i needle = _mm_set1_epi8(sync_char);
754 const __m128i *in = (const __m128i *)aligned_start;
755 if (current_frame->interleaved) {
756 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
757 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
758 if (current_frame->len % 2 == 1) {
762 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
763 while (in < (const __m128i *)limit) {
764 __m128i data1 = _mm_load_si128(in);
765 __m128i data2 = _mm_load_si128(in + 1);
766 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
767 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
768 __m128i data1_hi = _mm_srli_epi16(data1, 8);
769 __m128i data2_hi = _mm_srli_epi16(data2, 8);
770 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
771 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
772 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
773 _mm_storeu_si128(out2, hi);
774 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
775 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
776 if (!_mm_testz_si128(found1, found1) ||
777 !_mm_testz_si128(found2, found2)) {
785 current_frame->len += (uint8_t *)in - aligned_start;
787 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
788 while (in < (const __m128i *)limit) {
789 __m128i data = _mm_load_si128(in);
790 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
791 __m128i found = _mm_cmpeq_epi8(data, needle);
792 if (!_mm_testz_si128(found, found)) {
799 current_frame->len = (uint8_t *)out - current_frame->data;
802 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
803 return (const uint8_t *)in;
806 #endif // defined(HAS_MULTIVERSIONING)
808 void decode_packs(const libusb_transfer *xfr,
809 const char *sync_pattern,
811 FrameAllocator::Frame *current_frame,
812 const char *frame_type_name,
813 function<void(const uint8_t *start)> start_callback)
816 for (int i = 0; i < xfr->num_iso_packets; i++) {
817 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
819 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
820 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
825 const uint8_t *start = xfr->buffer + offset;
826 const uint8_t *limit = start + pack->actual_length;
827 while (start < limit) { // Usually runs only one iteration.
828 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
829 if (start == limit) break;
830 assert(start < limit);
832 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
833 if (start_next_frame == nullptr) {
834 // add the rest of the buffer
835 add_to_frame(current_frame, frame_type_name, start, limit);
838 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
839 start = start_next_frame + sync_length; // skip sync
840 start_callback(start);
844 dump_pack(xfr, offset, pack);
846 offset += pack->length;
850 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
852 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
853 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
854 fprintf(stderr, "error: transfer status %d\n", xfr->status);
855 libusb_free_transfer(xfr);
859 assert(xfr->user_data != nullptr);
860 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
862 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
863 if (!usb->disconnected) {
864 fprintf(stderr, "Device went away, stopping transfers.\n");
865 usb->disconnected = true;
866 if (usb->card_disconnected_callback) {
867 usb->card_disconnected_callback();
870 // Don't reschedule the transfer; the loop will stop by itself.
874 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
875 if (xfr->endpoint == 0x84) {
876 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
878 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
880 // Update the transfer with the new assumed width, if we're in the process of changing formats.
881 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
884 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
885 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
886 uint8_t *buf = libusb_control_transfer_get_data(xfr);
888 if (setup->wIndex == 44) {
889 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
891 printf("read register %2d: 0x%02x%02x%02x%02x\n",
892 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
895 memcpy(usb->register_file + usb->current_register, buf, 4);
896 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
897 if (usb->current_register == 0) {
898 // read through all of them
899 printf("register dump:");
900 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
901 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
905 libusb_fill_control_setup(xfr->buffer,
906 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
907 /*index=*/usb->current_register, /*length=*/4);
912 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
913 for (i = 0; i < xfr->actual_length; i++) {
914 printf("%02x", xfr->buffer[i]);
924 int rc = libusb_submit_transfer(xfr);
926 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
931 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
933 if (card_connected_callback != nullptr) {
934 libusb_device_descriptor desc;
935 if (libusb_get_device_descriptor(dev, &desc) < 0) {
936 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
937 libusb_unref_device(dev);
941 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
942 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
943 card_connected_callback(dev); // Callback takes ownership.
947 libusb_unref_device(dev);
951 void BMUSBCapture::usb_thread_func()
954 memset(¶m, 0, sizeof(param));
955 param.sched_priority = 1;
956 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
957 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
959 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
960 while (!should_quit) {
961 timeval sec { 1, 0 };
962 int rc = libusb_handle_events_timeout(nullptr, &sec);
963 if (rc != LIBUSB_SUCCESS)
970 struct USBCardDevice {
973 libusb_device *device;
976 const char *get_product_name(uint16_t product)
978 if (product == 0xbd3b) {
979 return "Intensity Shuttle";
980 } else if (product == 0xbd4f) {
981 return "UltraStudio SDI";
988 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
990 const char *product_name = get_product_name(product);
993 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
994 id, bus, port, product_name);
998 vector<USBCardDevice> find_all_cards()
1000 libusb_device **devices;
1001 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
1002 if (num_devices == -1) {
1003 fprintf(stderr, "Error finding USB devices\n");
1006 vector<USBCardDevice> found_cards;
1007 for (ssize_t i = 0; i < num_devices; ++i) {
1008 libusb_device_descriptor desc;
1009 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
1010 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
1014 uint8_t bus = libusb_get_bus_number(devices[i]);
1015 uint8_t port = libusb_get_port_number(devices[i]);
1017 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
1018 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
1019 libusb_unref_device(devices[i]);
1023 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
1025 libusb_free_device_list(devices, 0);
1027 // Sort the devices to get a consistent ordering.
1028 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
1029 if (a.product != b.product)
1030 return a.product < b.product;
1032 return a.bus < b.bus;
1033 return a.port < b.port;
1039 libusb_device_handle *open_card(int card_index, string *description)
1041 vector<USBCardDevice> found_cards = find_all_cards();
1043 for (size_t i = 0; i < found_cards.size(); ++i) {
1044 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1045 fprintf(stderr, "%s\n", tmp_description.c_str());
1046 if (i == size_t(card_index)) {
1047 *description = tmp_description;
1051 if (size_t(card_index) >= found_cards.size()) {
1052 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1056 libusb_device_handle *devh;
1057 int rc = libusb_open(found_cards[card_index].device, &devh);
1059 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1063 for (size_t i = 0; i < found_cards.size(); ++i) {
1064 libusb_unref_device(found_cards[i].device);
1070 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1072 uint8_t bus = libusb_get_bus_number(dev);
1073 uint8_t port = libusb_get_port_number(dev);
1075 libusb_device_descriptor desc;
1076 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1077 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1081 *description = get_card_description(card_index, bus, port, desc.idProduct);
1083 libusb_device_handle *devh;
1084 int rc = libusb_open(dev, &devh);
1086 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1095 unsigned BMUSBCapture::num_cards()
1097 int rc = libusb_init(nullptr);
1099 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1103 vector<USBCardDevice> found_cards = find_all_cards();
1104 unsigned ret = found_cards.size();
1105 for (size_t i = 0; i < found_cards.size(); ++i) {
1106 libusb_unref_device(found_cards[i].device);
1111 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1113 current_pixel_format = pixel_format;
1114 update_capture_mode();
1117 void BMUSBCapture::configure_card()
1119 if (video_frame_allocator == nullptr) {
1120 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1121 set_video_frame_allocator(owned_video_frame_allocator.get());
1123 if (audio_frame_allocator == nullptr) {
1124 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1125 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1127 dequeue_thread_should_quit = false;
1128 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1131 struct libusb_transfer *xfr;
1133 rc = libusb_init(nullptr);
1135 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1139 if (dev == nullptr) {
1140 devh = open_card(card_index, &description);
1142 devh = open_card(card_index, dev, &description);
1143 libusb_unref_device(dev);
1146 fprintf(stderr, "Error finding USB device\n");
1150 libusb_config_descriptor *config;
1151 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1153 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1158 printf("%d interface\n", config->bNumInterfaces);
1159 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1160 printf(" interface %d\n", interface_number);
1161 const libusb_interface *interface = &config->interface[interface_number];
1162 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1163 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1164 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1165 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1166 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1167 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1173 rc = libusb_set_configuration(devh, /*configuration=*/1);
1175 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1179 rc = libusb_claim_interface(devh, 0);
1181 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1185 // Alternate setting 1 is output, alternate setting 2 is input.
1186 // Card is reset when switching alternates, so the driver uses
1187 // this “double switch” when it wants to reset.
1189 // There's also alternate settings 3 and 4, which seem to be
1190 // like 1 and 2 except they advertise less bandwidth needed.
1191 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1193 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1194 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1195 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1196 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1197 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1201 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1203 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1207 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1209 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1215 rc = libusb_claim_interface(devh, 3);
1217 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1223 // 44 is some kind of timer register (first 16 bits count upwards)
1224 // 24 is some sort of watchdog?
1225 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1226 // (or will go to 0x73c60010?), also seen 0x73c60100
1227 // 12 also changes all the time, unclear why
1228 // 16 seems to be autodetected mode somehow
1229 // -- this is e00115e0 after reset?
1230 // ed0115e0 after mode change [to output?]
1231 // 2d0015e0 after more mode change [to input]
1232 // ed0115e0 after more mode change
1233 // 2d0015e0 after more mode change
1235 // 390115e0 seems to indicate we have signal
1236 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1238 // 200015e0 on startup
1239 // changes to 250115e0 when we sync to the signal
1241 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1243 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1245 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1246 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1248 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1249 // perhaps some of them are related to analog output?
1251 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1252 // but the driver sets it to 0x8036802a at some point.
1254 // all of this is on request 214/215. other requests (192, 219,
1255 // 222, 223, 224) are used for firmware upgrade. Probably best to
1256 // stay out of it unless you know what you're doing.
1260 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1263 // 0x01 - stable signal
1264 // 0x04 - deep color
1265 // 0x08 - unknown (audio??)
1269 update_capture_mode();
1277 static const ctrl ctrls[] = {
1278 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1279 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1281 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1282 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1283 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1284 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1287 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1288 uint32_t flipped = htonl(ctrls[req].data);
1289 static uint8_t value[4];
1290 memcpy(value, &flipped, sizeof(flipped));
1291 int size = sizeof(value);
1292 //if (ctrls[req].request == 215) size = 0;
1293 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1294 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1296 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1300 if (ctrls[req].index == 16 && rc == 4) {
1301 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1305 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1306 for (int i = 0; i < rc; ++i) {
1307 printf("%02x", value[i]);
1316 static int my_index = 0;
1317 static uint8_t value[4];
1318 int size = sizeof(value);
1319 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1320 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1322 fprintf(stderr, "Error on control\n");
1325 printf("rc=%d index=%d: 0x", rc, my_index);
1326 for (int i = 0; i < rc; ++i) {
1327 printf("%02x", value[i]);
1334 // set up an asynchronous transfer of the timer register
1335 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1336 static int completed = 0;
1338 xfr = libusb_alloc_transfer(0);
1339 libusb_fill_control_setup(cmdbuf,
1340 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1341 /*index=*/44, /*length=*/4);
1342 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1343 xfr->user_data = this;
1344 libusb_submit_transfer(xfr);
1346 // set up an asynchronous transfer of register 24
1347 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1348 static int completed2 = 0;
1350 xfr = libusb_alloc_transfer(0);
1351 libusb_fill_control_setup(cmdbuf2,
1352 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1353 /*index=*/24, /*length=*/4);
1354 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1355 xfr->user_data = this;
1356 libusb_submit_transfer(xfr);
1359 // set up an asynchronous transfer of the register dump
1360 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1361 static int completed3 = 0;
1363 xfr = libusb_alloc_transfer(0);
1364 libusb_fill_control_setup(cmdbuf3,
1365 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1366 /*index=*/current_register, /*length=*/4);
1367 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1368 xfr->user_data = this;
1369 //libusb_submit_transfer(xfr);
1371 //audiofp = fopen("audio.raw", "wb");
1373 // set up isochronous transfers for audio and video
1374 for (int e = 3; e <= 4; ++e) {
1375 int num_transfers = 6;
1376 for (int i = 0; i < num_transfers; ++i) {
1378 int num_iso_pack, size;
1380 // Allocate for minimum width (because that will give us the most
1381 // number of packets, so we don't need to reallocate, but we'll
1382 // default to 720p for the first frame.
1383 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1384 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1385 buf_size = USB_VIDEO_TRANSFER_SIZE;
1389 buf_size = num_iso_pack * size;
1391 int num_bytes = num_iso_pack * size;
1392 assert(size_t(num_bytes) <= buf_size);
1393 #if LIBUSB_API_VERSION >= 0x01000105
1394 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1396 uint8_t *buf = nullptr;
1398 if (buf == nullptr) {
1399 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1400 #if LIBUSB_API_VERSION >= 0x01000105
1401 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1403 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1405 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1406 buf = new uint8_t[num_bytes];
1409 xfr = libusb_alloc_transfer(num_iso_pack);
1411 fprintf(stderr, "oom\n");
1415 int ep = LIBUSB_ENDPOINT_IN | e;
1416 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1417 num_iso_pack, cb_xfr, nullptr, 0);
1418 libusb_set_iso_packet_lengths(xfr, size);
1419 xfr->user_data = this;
1422 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1425 iso_xfrs.push_back(xfr);
1430 void BMUSBCapture::start_bm_capture()
1433 for (libusb_transfer *xfr : iso_xfrs) {
1434 int rc = libusb_submit_transfer(xfr);
1437 //printf("num_bytes=%d\n", num_bytes);
1438 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1439 xfr->endpoint, i, libusb_error_name(rc));
1446 libusb_release_interface(devh, 0);
1450 libusb_exit(nullptr);
1455 void BMUSBCapture::stop_dequeue_thread()
1457 dequeue_thread_should_quit = true;
1458 queues_not_empty.notify_all();
1459 dequeue_thread.join();
1462 void BMUSBCapture::start_bm_thread()
1464 // Devices leaving are discovered by seeing the isochronous packets
1465 // coming back with errors, so only care about devices joining.
1466 if (card_connected_callback != nullptr) {
1467 if (libusb_hotplug_register_callback(
1468 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1469 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1470 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1471 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1476 should_quit = false;
1477 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1480 void BMUSBCapture::stop_bm_thread()
1483 libusb_interrupt_event_handler(nullptr);
1487 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1489 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1490 VideoMode auto_mode;
1491 auto_mode.name = "Autodetect";
1492 auto_mode.autodetect = true;
1493 return {{ 0, auto_mode }};
1496 uint32_t BMUSBCapture::get_current_video_mode() const
1498 return 0; // Matches get_available_video_modes().
1501 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1503 assert(video_mode_id == 0); // Matches get_available_video_modes().
1506 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1509 { 0x00000000, "HDMI/SDI" },
1510 { 0x02000000, "Component" },
1511 { 0x04000000, "Composite" },
1512 { 0x06000000, "S-video" }
1516 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1518 assert((video_input_id & ~0x06000000) == 0);
1519 current_video_input = video_input_id;
1520 update_capture_mode();
1523 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1526 { 0x00000000, "Embedded" },
1527 { 0x10000000, "Analog" }
1531 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1533 assert((audio_input_id & ~0x10000000) == 0);
1534 current_audio_input = audio_input_id;
1535 update_capture_mode();
1538 void BMUSBCapture::update_capture_mode()
1540 if (devh == nullptr) {
1544 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1545 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1546 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1547 mode |= htonl(0x20000000);
1549 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1552 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1553 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1555 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1560 } // namespace bmusb