1 // Intensity Shuttle USB3 capture driver, v0.5.4
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int find_xfer_size_for_width(int width)
68 // Video seems to require isochronous packets scaled with the width;
69 // seemingly six lines is about right, rounded up to the required 1kB
71 int size = width * 2 * 6;
72 // Note that for 10-bit input, you'll need to increase size accordingly.
73 //size = size * 4 / 3;
74 if (size % 1024 != 0) {
81 void change_xfer_size_for_width(int width, libusb_transfer *xfr)
83 assert(width >= MIN_WIDTH);
84 size_t size = find_xfer_size_for_width(width);
85 int num_iso_pack = xfr->length / size;
86 if (num_iso_pack != xfr->num_iso_packets ||
87 size != xfr->iso_packet_desc[0].length) {
88 xfr->num_iso_packets = num_iso_pack;
89 libusb_set_iso_packet_lengths(xfr, size);
93 struct VideoFormatEntry {
94 uint16_t normalized_video_format;
95 unsigned width, height, second_field_start;
96 unsigned extra_lines_top, extra_lines_bottom;
97 unsigned frame_rate_nom, frame_rate_den;
101 // Get details for the given video format; returns false if detection was incomplete.
102 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
104 decoded_video_format->id = video_format;
105 decoded_video_format->interlaced = false;
107 // TODO: Add these for all formats as we find them.
108 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
110 if (video_format == 0x0800) {
111 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
112 // It's a strange thing, but what can you do.
113 decoded_video_format->width = 720;
114 decoded_video_format->height = 525;
115 decoded_video_format->stride = 720 * 2;
116 decoded_video_format->extra_lines_top = 0;
117 decoded_video_format->extra_lines_bottom = 0;
118 decoded_video_format->frame_rate_nom = 3013;
119 decoded_video_format->frame_rate_den = 100;
120 decoded_video_format->has_signal = false;
123 if ((video_format & 0xe800) != 0xe800) {
124 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
126 decoded_video_format->width = 0;
127 decoded_video_format->height = 0;
128 decoded_video_format->stride = 0;
129 decoded_video_format->extra_lines_top = 0;
130 decoded_video_format->extra_lines_bottom = 0;
131 decoded_video_format->frame_rate_nom = 60;
132 decoded_video_format->frame_rate_den = 1;
133 decoded_video_format->has_signal = false;
137 decoded_video_format->has_signal = true;
139 // NTSC (480i59.94, I suppose). A special case, see below.
140 if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
141 decoded_video_format->width = 720;
142 decoded_video_format->height = 480;
143 decoded_video_format->stride = 720 * 2;
144 decoded_video_format->extra_lines_top = 17;
145 decoded_video_format->extra_lines_bottom = 28;
146 decoded_video_format->frame_rate_nom = 30000;
147 decoded_video_format->frame_rate_den = 1001;
148 decoded_video_format->second_field_start = 280;
149 decoded_video_format->interlaced = true;
153 // PAL (576i50, I suppose). A special case, see below.
154 if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
155 decoded_video_format->width = 720;
156 decoded_video_format->height = 576;
157 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->extra_lines_top = 22;
159 decoded_video_format->extra_lines_bottom = 27;
160 decoded_video_format->frame_rate_nom = 25;
161 decoded_video_format->frame_rate_den = 1;
162 decoded_video_format->second_field_start = 335;
163 decoded_video_format->interlaced = true;
167 // 0x8 seems to be a flag about availability of deep color on the input,
168 // except when it's not (e.g. it's the only difference between NTSC
169 // and PAL). Rather confusing. But we clear it here nevertheless, because
170 // usually it doesn't mean anything.
172 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
173 uint16_t normalized_video_format = video_format & ~0xe80c;
174 constexpr VideoFormatEntry entries[] = {
175 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
176 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
177 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
178 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
179 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
180 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
181 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
182 { 0x01c3, 1920, 1080, 0, 20, 25, 30, 1, false }, // 1080p30.
183 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
184 { 0x01e1, 1920, 1080, 0, 20, 25, 30000, 1001, false }, // 1080p29.97.
185 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
186 { 0x0063, 1920, 1080, 0, 20, 25, 25, 1, false }, // 1080p25.
187 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
188 { 0x0083, 1920, 1080, 0, 20, 25, 24, 1, false }, // 1080p24.
189 { 0x00a1, 1920, 1080, 0, 20, 25, 24000, 1001, false }, // 1080p23.98.
191 for (const VideoFormatEntry &entry : entries) {
192 if (normalized_video_format == entry.normalized_video_format) {
193 decoded_video_format->width = entry.width;
194 decoded_video_format->height = entry.height;
195 decoded_video_format->stride = entry.width * 2;
196 decoded_video_format->second_field_start = entry.second_field_start;
197 decoded_video_format->extra_lines_top = entry.extra_lines_top;
198 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
199 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
200 decoded_video_format->frame_rate_den = entry.frame_rate_den;
201 decoded_video_format->interlaced = entry.interlaced;
206 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
207 decoded_video_format->width = 1280;
208 decoded_video_format->height = 720;
209 decoded_video_format->stride = 1280 * 2;
210 decoded_video_format->frame_rate_nom = 60;
211 decoded_video_format->frame_rate_den = 1;
217 FrameAllocator::~FrameAllocator() {}
219 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
220 : frame_size(frame_size)
222 for (size_t i = 0; i < num_queued_frames; ++i) {
223 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
227 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
232 unique_lock<mutex> lock(freelist_mutex); // Meh.
233 if (freelist.empty()) {
234 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
237 vf.data = freelist.top().release();
238 vf.size = frame_size;
239 freelist.pop(); // Meh.
244 void MallocFrameAllocator::release_frame(Frame frame)
246 if (frame.overflow > 0) {
247 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
249 unique_lock<mutex> lock(freelist_mutex);
250 freelist.push(unique_ptr<uint8_t[]>(frame.data));
253 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
258 return (b - a < 0x8000);
260 int wrap_b = 0x10000 + int(b);
261 return (wrap_b - a < 0x8000);
265 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
267 unique_lock<mutex> lock(queue_lock);
268 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
269 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
270 q->back().timecode, timecode);
271 frame.owner->release_frame(frame);
277 qf.timecode = timecode;
279 q->push_back(move(qf));
280 queues_not_empty.notify_one(); // might be spurious
283 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
285 FILE *fp = fopen(filename, "wb");
286 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
287 printf("short write!\n");
292 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
294 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
297 void BMUSBCapture::dequeue_thread_func()
299 char thread_name[16];
300 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
301 pthread_setname_np(pthread_self(), thread_name);
303 if (has_dequeue_callbacks) {
304 dequeue_init_callback();
306 while (!dequeue_thread_should_quit) {
307 unique_lock<mutex> lock(queue_lock);
308 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
310 if (dequeue_thread_should_quit) break;
312 uint16_t video_timecode = pending_video_frames.front().timecode;
313 uint16_t audio_timecode = pending_audio_frames.front().timecode;
314 AudioFormat audio_format;
315 audio_format.bits_per_sample = 24;
316 audio_format.num_channels = 8;
317 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
318 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
320 QueuedFrame video_frame = pending_video_frames.front();
321 pending_video_frames.pop_front();
323 video_frame_allocator->release_frame(video_frame.frame);
324 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
325 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
327 QueuedFrame audio_frame = pending_audio_frames.front();
328 pending_audio_frames.pop_front();
330 audio_format.id = audio_frame.format;
332 // Use the video format of the pending frame.
333 QueuedFrame video_frame = pending_video_frames.front();
334 VideoFormat video_format;
335 decode_video_format(video_frame.format, &video_format);
337 frame_callback(audio_timecode,
338 FrameAllocator::Frame(), 0, video_format,
339 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
341 QueuedFrame video_frame = pending_video_frames.front();
342 QueuedFrame audio_frame = pending_audio_frames.front();
343 pending_audio_frames.pop_front();
344 pending_video_frames.pop_front();
349 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
350 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
351 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
354 VideoFormat video_format;
355 audio_format.id = audio_frame.format;
356 if (decode_video_format(video_frame.format, &video_format)) {
357 frame_callback(video_timecode,
358 video_frame.frame, HEADER_SIZE, video_format,
359 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
361 frame_callback(video_timecode,
362 FrameAllocator::Frame(), 0, video_format,
363 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
367 if (has_dequeue_callbacks) {
368 dequeue_cleanup_callback();
372 void BMUSBCapture::start_new_frame(const uint8_t *start)
374 uint16_t format = (start[3] << 8) | start[2];
375 uint16_t timecode = (start[1] << 8) | start[0];
377 if (current_video_frame.len > 0) {
378 current_video_frame.received_timestamp = steady_clock::now();
380 // If format is 0x0800 (no signal), add a fake (empty) audio
381 // frame to get it out of the queue.
382 // TODO: Figure out if there are other formats that come with
383 // no audio, and treat them the same.
384 if (format == 0x0800) {
385 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
386 if (fake_audio_frame.data == nullptr) {
387 // Oh well, it's just a no-signal frame anyway.
388 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
389 current_video_frame.owner->release_frame(current_video_frame);
390 current_video_frame = video_frame_allocator->alloc_frame();
393 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
396 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
398 // Update the assumed frame width. We might be one frame too late on format changes,
399 // but it's much better than asking the user to choose manually.
400 VideoFormat video_format;
401 if (decode_video_format(format, &video_format)) {
402 assumed_frame_width = video_format.width;
405 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
407 // //start[7], start[6], start[5], start[4],
408 // read_current_frame, FRAME_SIZE);
410 current_video_frame = video_frame_allocator->alloc_frame();
411 //if (current_video_frame.data == nullptr) {
412 // read_current_frame = -1;
414 // read_current_frame = 0;
418 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
420 uint16_t format = (start[3] << 8) | start[2];
421 uint16_t timecode = (start[1] << 8) | start[0];
422 if (current_audio_frame.len > 0) {
423 current_audio_frame.received_timestamp = steady_clock::now();
424 //dump_audio_block();
425 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
427 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
428 // format, timecode, read_current_audio_block);
429 current_audio_frame = audio_frame_allocator->alloc_frame();
433 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
435 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
436 for (unsigned j = 0; j < pack->actual_length; j++) {
437 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
438 printf("%02x", xfr->buffer[j + offset]);
441 else if ((j % 8) == 7)
449 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
452 uint8_t *dptr1 = dest1;
453 uint8_t *dptr2 = dest2;
455 for (size_t i = 0; i < n; i += 2) {
461 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
463 if (current_frame->data == nullptr ||
464 current_frame->len > current_frame->size ||
469 int bytes = end - start;
470 if (current_frame->len + bytes > current_frame->size) {
471 current_frame->overflow = current_frame->len + bytes - current_frame->size;
472 current_frame->len = current_frame->size;
473 if (current_frame->overflow > 1048576) {
474 printf("%d bytes overflow after last %s frame\n",
475 int(current_frame->overflow), frame_type_name);
476 current_frame->overflow = 0;
480 if (current_frame->interleaved) {
481 uint8_t *data = current_frame->data + current_frame->len / 2;
482 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
483 if (current_frame->len % 2 == 1) {
487 if (bytes % 2 == 1) {
490 ++current_frame->len;
493 memcpy_interleaved(data, data2, start, bytes);
494 current_frame->len += bytes;
496 memcpy(current_frame->data + current_frame->len, start, bytes);
497 current_frame->len += bytes;
503 void avx2_dump(const char *name, __m256i n)
505 printf("%-10s:", name);
506 printf(" %02x", _mm256_extract_epi8(n, 0));
507 printf(" %02x", _mm256_extract_epi8(n, 1));
508 printf(" %02x", _mm256_extract_epi8(n, 2));
509 printf(" %02x", _mm256_extract_epi8(n, 3));
510 printf(" %02x", _mm256_extract_epi8(n, 4));
511 printf(" %02x", _mm256_extract_epi8(n, 5));
512 printf(" %02x", _mm256_extract_epi8(n, 6));
513 printf(" %02x", _mm256_extract_epi8(n, 7));
515 printf(" %02x", _mm256_extract_epi8(n, 8));
516 printf(" %02x", _mm256_extract_epi8(n, 9));
517 printf(" %02x", _mm256_extract_epi8(n, 10));
518 printf(" %02x", _mm256_extract_epi8(n, 11));
519 printf(" %02x", _mm256_extract_epi8(n, 12));
520 printf(" %02x", _mm256_extract_epi8(n, 13));
521 printf(" %02x", _mm256_extract_epi8(n, 14));
522 printf(" %02x", _mm256_extract_epi8(n, 15));
524 printf(" %02x", _mm256_extract_epi8(n, 16));
525 printf(" %02x", _mm256_extract_epi8(n, 17));
526 printf(" %02x", _mm256_extract_epi8(n, 18));
527 printf(" %02x", _mm256_extract_epi8(n, 19));
528 printf(" %02x", _mm256_extract_epi8(n, 20));
529 printf(" %02x", _mm256_extract_epi8(n, 21));
530 printf(" %02x", _mm256_extract_epi8(n, 22));
531 printf(" %02x", _mm256_extract_epi8(n, 23));
533 printf(" %02x", _mm256_extract_epi8(n, 24));
534 printf(" %02x", _mm256_extract_epi8(n, 25));
535 printf(" %02x", _mm256_extract_epi8(n, 26));
536 printf(" %02x", _mm256_extract_epi8(n, 27));
537 printf(" %02x", _mm256_extract_epi8(n, 28));
538 printf(" %02x", _mm256_extract_epi8(n, 29));
539 printf(" %02x", _mm256_extract_epi8(n, 30));
540 printf(" %02x", _mm256_extract_epi8(n, 31));
545 #ifndef HAS_MULTIVERSIONING
547 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
549 // No fast path possible unless we have multiversioning.
553 #else // defined(HAS_MULTIVERSIONING)
555 __attribute__((target("sse4.1")))
556 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
558 __attribute__((target("avx2")))
559 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
561 // Does a memcpy and memchr in one to reduce processing time.
562 // Note that the benefit is somewhat limited if your L3 cache is small,
563 // as you'll (unfortunately) spend most of the time loading the data
566 // Complicated cases are left to the slow path; it basically stops copying
567 // up until the first instance of "sync_char" (usually a bit before, actually).
568 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
569 // data, and what we really need this for is the 00 00 ff ff marker in video data.
570 __attribute__((target("default")))
571 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
573 // No fast path possible unless we have SSE 4.1 or higher.
577 __attribute__((target("sse4.1", "avx2")))
578 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
580 if (current_frame->data == nullptr ||
581 current_frame->len > current_frame->size ||
585 size_t orig_bytes = limit - start;
586 if (orig_bytes < 128) {
591 // Don't read more bytes than we can write.
592 limit = min(limit, start + (current_frame->size - current_frame->len));
594 // Align end to 32 bytes.
595 limit = (const uint8_t *)(intptr_t(limit) & ~31);
597 if (start >= limit) {
601 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
602 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
603 if (aligned_start != start) {
604 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
605 if (sync_start == nullptr) {
606 add_to_frame(current_frame, "", start, aligned_start);
608 add_to_frame(current_frame, "", start, sync_start);
613 // Make the length a multiple of 64.
614 if (current_frame->interleaved) {
615 if (((limit - aligned_start) % 64) != 0) {
618 assert(((limit - aligned_start) % 64) == 0);
621 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
624 __attribute__((target("avx2")))
625 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
627 const __m256i needle = _mm256_set1_epi8(sync_char);
629 const __restrict __m256i *in = (const __m256i *)aligned_start;
630 if (current_frame->interleaved) {
631 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
632 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
633 if (current_frame->len % 2 == 1) {
637 __m256i shuffle_cw = _mm256_set_epi8(
638 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
639 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
640 while (in < (const __m256i *)limit) {
641 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
642 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
643 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
645 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
646 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
647 __m256i found = _mm256_or_si256(found1, found2);
649 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
650 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
652 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
653 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
655 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
656 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
658 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
659 _mm256_storeu_si256(out2, hi);
661 if (!_mm256_testz_si256(found, found)) {
669 current_frame->len += (uint8_t *)in - aligned_start;
671 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
672 while (in < (const __m256i *)limit) {
673 __m256i data = _mm256_load_si256(in);
674 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
675 __m256i found = _mm256_cmpeq_epi8(data, needle);
676 if (!_mm256_testz_si256(found, found)) {
683 current_frame->len = (uint8_t *)out - current_frame->data;
686 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
687 return (const uint8_t *)in;
690 __attribute__((target("sse4.1")))
691 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
693 const __m128i needle = _mm_set1_epi8(sync_char);
695 const __m128i *in = (const __m128i *)aligned_start;
696 if (current_frame->interleaved) {
697 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
698 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
699 if (current_frame->len % 2 == 1) {
703 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
704 while (in < (const __m128i *)limit) {
705 __m128i data1 = _mm_load_si128(in);
706 __m128i data2 = _mm_load_si128(in + 1);
707 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
708 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
709 __m128i data1_hi = _mm_srli_epi16(data1, 8);
710 __m128i data2_hi = _mm_srli_epi16(data2, 8);
711 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
712 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
713 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
714 _mm_storeu_si128(out2, hi);
715 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
716 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
717 if (!_mm_testz_si128(found1, found1) ||
718 !_mm_testz_si128(found2, found2)) {
726 current_frame->len += (uint8_t *)in - aligned_start;
728 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
729 while (in < (const __m128i *)limit) {
730 __m128i data = _mm_load_si128(in);
731 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
732 __m128i found = _mm_cmpeq_epi8(data, needle);
733 if (!_mm_testz_si128(found, found)) {
740 current_frame->len = (uint8_t *)out - current_frame->data;
743 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
744 return (const uint8_t *)in;
747 #endif // defined(HAS_MULTIVERSIONING)
749 void decode_packs(const libusb_transfer *xfr,
750 const char *sync_pattern,
752 FrameAllocator::Frame *current_frame,
753 const char *frame_type_name,
754 function<void(const uint8_t *start)> start_callback)
757 for (int i = 0; i < xfr->num_iso_packets; i++) {
758 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
760 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
761 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
766 const uint8_t *start = xfr->buffer + offset;
767 const uint8_t *limit = start + pack->actual_length;
768 while (start < limit) { // Usually runs only one iteration.
769 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
770 if (start == limit) break;
771 assert(start < limit);
773 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
774 if (start_next_frame == nullptr) {
775 // add the rest of the buffer
776 add_to_frame(current_frame, frame_type_name, start, limit);
779 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
780 start = start_next_frame + sync_length; // skip sync
781 start_callback(start);
785 dump_pack(xfr, offset, pack);
787 offset += pack->length;
791 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
793 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
794 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
795 fprintf(stderr, "error: transfer status %d\n", xfr->status);
796 libusb_free_transfer(xfr);
800 assert(xfr->user_data != nullptr);
801 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
803 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
804 if (!usb->disconnected) {
805 fprintf(stderr, "Device went away, stopping transfers.\n");
806 usb->disconnected = true;
807 if (usb->card_disconnected_callback) {
808 usb->card_disconnected_callback();
811 // Don't reschedule the transfer; the loop will stop by itself.
815 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
816 if (xfr->endpoint == 0x84) {
817 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
819 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
821 // Update the transfer with the new assumed width, if we're in the process of changing formats.
822 change_xfer_size_for_width(usb->assumed_frame_width, xfr);
825 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
826 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
827 uint8_t *buf = libusb_control_transfer_get_data(xfr);
829 if (setup->wIndex == 44) {
830 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
832 printf("read register %2d: 0x%02x%02x%02x%02x\n",
833 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
836 memcpy(usb->register_file + usb->current_register, buf, 4);
837 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
838 if (usb->current_register == 0) {
839 // read through all of them
840 printf("register dump:");
841 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
842 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
846 libusb_fill_control_setup(xfr->buffer,
847 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
848 /*index=*/usb->current_register, /*length=*/4);
853 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
854 for (i = 0; i < xfr->actual_length; i++) {
855 printf("%02x", xfr->buffer[i]);
865 int rc = libusb_submit_transfer(xfr);
867 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
872 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
874 if (card_connected_callback != nullptr) {
875 libusb_device_descriptor desc;
876 if (libusb_get_device_descriptor(dev, &desc) < 0) {
877 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
878 libusb_unref_device(dev);
882 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
883 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
884 card_connected_callback(dev); // Callback takes ownership.
888 libusb_unref_device(dev);
892 void BMUSBCapture::usb_thread_func()
895 memset(¶m, 0, sizeof(param));
896 param.sched_priority = 1;
897 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
898 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
900 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
901 while (!should_quit) {
902 timeval sec { 1, 0 };
903 int rc = libusb_handle_events_timeout(nullptr, &sec);
904 if (rc != LIBUSB_SUCCESS)
911 struct USBCardDevice {
914 libusb_device *device;
917 const char *get_product_name(uint16_t product)
919 if (product == 0xbd3b) {
920 return "Intensity Shuttle";
921 } else if (product == 0xbd4f) {
922 return "UltraStudio SDI";
929 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
931 const char *product_name = get_product_name(product);
934 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
935 id, bus, port, product_name);
939 vector<USBCardDevice> find_all_cards()
941 libusb_device **devices;
942 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
943 if (num_devices == -1) {
944 fprintf(stderr, "Error finding USB devices\n");
947 vector<USBCardDevice> found_cards;
948 for (ssize_t i = 0; i < num_devices; ++i) {
949 libusb_device_descriptor desc;
950 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
951 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
955 uint8_t bus = libusb_get_bus_number(devices[i]);
956 uint8_t port = libusb_get_port_number(devices[i]);
958 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
959 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
960 libusb_unref_device(devices[i]);
964 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
966 libusb_free_device_list(devices, 0);
968 // Sort the devices to get a consistent ordering.
969 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
970 if (a.product != b.product)
971 return a.product < b.product;
973 return a.bus < b.bus;
974 return a.port < b.port;
980 libusb_device_handle *open_card(int card_index, string *description)
982 vector<USBCardDevice> found_cards = find_all_cards();
984 for (size_t i = 0; i < found_cards.size(); ++i) {
985 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
986 fprintf(stderr, "%s\n", tmp_description.c_str());
987 if (i == size_t(card_index)) {
988 *description = tmp_description;
992 if (size_t(card_index) >= found_cards.size()) {
993 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
997 libusb_device_handle *devh;
998 int rc = libusb_open(found_cards[card_index].device, &devh);
1000 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1004 for (size_t i = 0; i < found_cards.size(); ++i) {
1005 libusb_unref_device(found_cards[i].device);
1011 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1013 uint8_t bus = libusb_get_bus_number(dev);
1014 uint8_t port = libusb_get_port_number(dev);
1016 libusb_device_descriptor desc;
1017 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1018 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1022 *description = get_card_description(card_index, bus, port, desc.idProduct);
1024 libusb_device_handle *devh;
1025 int rc = libusb_open(dev, &devh);
1027 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1036 unsigned BMUSBCapture::num_cards()
1038 int rc = libusb_init(nullptr);
1040 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1044 vector<USBCardDevice> found_cards = find_all_cards();
1045 unsigned ret = found_cards.size();
1046 for (size_t i = 0; i < found_cards.size(); ++i) {
1047 libusb_unref_device(found_cards[i].device);
1052 void BMUSBCapture::configure_card()
1054 if (video_frame_allocator == nullptr) {
1055 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1056 set_video_frame_allocator(owned_video_frame_allocator.get());
1058 if (audio_frame_allocator == nullptr) {
1059 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1060 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1062 dequeue_thread_should_quit = false;
1063 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1066 struct libusb_transfer *xfr;
1068 rc = libusb_init(nullptr);
1070 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1074 if (dev == nullptr) {
1075 devh = open_card(card_index, &description);
1077 devh = open_card(card_index, dev, &description);
1078 libusb_unref_device(dev);
1081 fprintf(stderr, "Error finding USB device\n");
1085 libusb_config_descriptor *config;
1086 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1088 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1093 printf("%d interface\n", config->bNumInterfaces);
1094 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1095 printf(" interface %d\n", interface_number);
1096 const libusb_interface *interface = &config->interface[interface_number];
1097 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1098 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1099 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1100 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1101 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1102 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1108 rc = libusb_set_configuration(devh, /*configuration=*/1);
1110 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1114 rc = libusb_claim_interface(devh, 0);
1116 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1120 // Alternate setting 1 is output, alternate setting 2 is input.
1121 // Card is reset when switching alternates, so the driver uses
1122 // this “double switch” when it wants to reset.
1124 // There's also alternate settings 3 and 4, which seem to be
1125 // like 1 and 2 except they advertise less bandwidth needed.
1126 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1128 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1129 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1130 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1131 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1132 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1136 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1138 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1142 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1144 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1150 rc = libusb_claim_interface(devh, 3);
1152 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1158 // 44 is some kind of timer register (first 16 bits count upwards)
1159 // 24 is some sort of watchdog?
1160 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1161 // (or will go to 0x73c60010?), also seen 0x73c60100
1162 // 12 also changes all the time, unclear why
1163 // 16 seems to be autodetected mode somehow
1164 // -- this is e00115e0 after reset?
1165 // ed0115e0 after mode change [to output?]
1166 // 2d0015e0 after more mode change [to input]
1167 // ed0115e0 after more mode change
1168 // 2d0015e0 after more mode change
1170 // 390115e0 seems to indicate we have signal
1171 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1173 // 200015e0 on startup
1174 // changes to 250115e0 when we sync to the signal
1176 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1178 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1180 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1181 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1183 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1184 // perhaps some of them are related to analog output?
1186 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1187 // but the driver sets it to 0x8036802a at some point.
1189 // all of this is on request 214/215. other requests (192, 219,
1190 // 222, 223, 224) are used for firmware upgrade. Probably best to
1191 // stay out of it unless you know what you're doing.
1195 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1198 // 0x01 - stable signal
1199 // 0x04 - deep color
1200 // 0x08 - unknown (audio??)
1204 update_capture_mode();
1212 static const ctrl ctrls[] = {
1213 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1214 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1216 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1217 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1218 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1219 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1222 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1223 uint32_t flipped = htonl(ctrls[req].data);
1224 static uint8_t value[4];
1225 memcpy(value, &flipped, sizeof(flipped));
1226 int size = sizeof(value);
1227 //if (ctrls[req].request == 215) size = 0;
1228 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1229 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1231 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1235 if (ctrls[req].index == 16 && rc == 4) {
1236 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1240 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1241 for (int i = 0; i < rc; ++i) {
1242 printf("%02x", value[i]);
1251 static int my_index = 0;
1252 static uint8_t value[4];
1253 int size = sizeof(value);
1254 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1255 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1257 fprintf(stderr, "Error on control\n");
1260 printf("rc=%d index=%d: 0x", rc, my_index);
1261 for (int i = 0; i < rc; ++i) {
1262 printf("%02x", value[i]);
1269 // set up an asynchronous transfer of the timer register
1270 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1271 static int completed = 0;
1273 xfr = libusb_alloc_transfer(0);
1274 libusb_fill_control_setup(cmdbuf,
1275 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1276 /*index=*/44, /*length=*/4);
1277 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1278 xfr->user_data = this;
1279 libusb_submit_transfer(xfr);
1281 // set up an asynchronous transfer of register 24
1282 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1283 static int completed2 = 0;
1285 xfr = libusb_alloc_transfer(0);
1286 libusb_fill_control_setup(cmdbuf2,
1287 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1288 /*index=*/24, /*length=*/4);
1289 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1290 xfr->user_data = this;
1291 libusb_submit_transfer(xfr);
1294 // set up an asynchronous transfer of the register dump
1295 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1296 static int completed3 = 0;
1298 xfr = libusb_alloc_transfer(0);
1299 libusb_fill_control_setup(cmdbuf3,
1300 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1301 /*index=*/current_register, /*length=*/4);
1302 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1303 xfr->user_data = this;
1304 //libusb_submit_transfer(xfr);
1306 //audiofp = fopen("audio.raw", "wb");
1308 // set up isochronous transfers for audio and video
1309 for (int e = 3; e <= 4; ++e) {
1310 //int num_transfers = (e == 3) ? 6 : 6;
1311 int num_transfers = 6;
1312 for (int i = 0; i < num_transfers; ++i) {
1314 int num_iso_pack, size;
1316 // Allocate for minimum width (because that will give us the most
1317 // number of packets, so we don't need to reallocated, but we'll
1318 // default to 720p for the first frame.
1319 size = find_xfer_size_for_width(MIN_WIDTH);
1320 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1321 buf_size = USB_VIDEO_TRANSFER_SIZE;
1325 buf_size = num_iso_pack * size;
1327 int num_bytes = num_iso_pack * size;
1328 assert(size_t(num_bytes) <= buf_size);
1329 #if LIBUSB_API_VERSION >= 0x01000105
1330 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1332 uint8_t *buf = nullptr;
1334 if (buf == nullptr) {
1335 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1336 #if LIBUSB_API_VERSION >= 0x01000105
1337 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1339 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1341 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1342 buf = new uint8_t[num_bytes];
1345 xfr = libusb_alloc_transfer(num_iso_pack);
1347 fprintf(stderr, "oom\n");
1351 int ep = LIBUSB_ENDPOINT_IN | e;
1352 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1353 num_iso_pack, cb_xfr, nullptr, 0);
1354 libusb_set_iso_packet_lengths(xfr, size);
1355 xfr->user_data = this;
1358 change_xfer_size_for_width(assumed_frame_width, xfr);
1361 iso_xfrs.push_back(xfr);
1366 void BMUSBCapture::start_bm_capture()
1369 for (libusb_transfer *xfr : iso_xfrs) {
1370 int rc = libusb_submit_transfer(xfr);
1373 //printf("num_bytes=%d\n", num_bytes);
1374 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1375 xfr->endpoint, i, libusb_error_name(rc));
1382 libusb_release_interface(devh, 0);
1386 libusb_exit(nullptr);
1391 void BMUSBCapture::stop_dequeue_thread()
1393 dequeue_thread_should_quit = true;
1394 queues_not_empty.notify_all();
1395 dequeue_thread.join();
1398 void BMUSBCapture::start_bm_thread()
1400 // Devices leaving are discovered by seeing the isochronous packets
1401 // coming back with errors, so only care about devices joining.
1402 if (card_connected_callback != nullptr) {
1403 if (libusb_hotplug_register_callback(
1404 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1405 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1406 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1407 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1412 should_quit = false;
1413 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1416 void BMUSBCapture::stop_bm_thread()
1422 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1424 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1425 VideoMode auto_mode;
1426 auto_mode.name = "Autodetect";
1427 auto_mode.autodetect = true;
1428 return {{ 0, auto_mode }};
1431 uint32_t BMUSBCapture::get_current_video_mode() const
1433 return 0; // Matches get_available_video_modes().
1436 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1438 assert(video_mode_id == 0); // Matches get_available_video_modes().
1441 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1444 { 0x00000000, "HDMI/SDI" },
1445 { 0x02000000, "Component" },
1446 { 0x04000000, "Composite" },
1447 { 0x06000000, "S-video" }
1451 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1453 assert((video_input_id & ~0x06000000) == 0);
1454 current_video_input = video_input_id;
1455 update_capture_mode();
1458 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1461 { 0x00000000, "Embedded" },
1462 { 0x10000000, "Analog" }
1466 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1468 assert((audio_input_id & ~0x10000000) == 0);
1469 current_audio_input = audio_input_id;
1470 update_capture_mode();
1473 void BMUSBCapture::update_capture_mode()
1475 // clearing the 0x20000000 bit seems to activate 10-bit capture (v210).
1476 // clearing the 0x08000000 bit seems to change the capture format (other source?)
1477 uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input);
1479 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1480 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1482 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1487 } // namespace bmusb