1 // Intensity Shuttle USB3 capture driver, v0.5.4
2 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
7 #if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
8 #define HAS_MULTIVERSIONING 1
15 #include <netinet/in.h>
22 #if HAS_MULTIVERSIONING
23 #include <immintrin.h>
25 #include "bmusb/bmusb.h"
30 #include <condition_variable>
42 using namespace std::chrono;
43 using namespace std::placeholders;
45 #define USB_VENDOR_BLACKMAGIC 0x1edb
47 #define HEADER_SIZE 44
48 //#define HEADER_SIZE 0
49 #define AUDIO_HEADER_SIZE 4
51 #define FRAME_SIZE (8 << 20) // 8 MB.
52 #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
56 card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
57 bool BMUSBCapture::hotplug_existing_devices = false;
64 atomic<bool> should_quit;
66 int v210_stride(int width)
68 return (width + 5) / 6 * 4 * sizeof(uint32_t);
71 int find_xfer_size_for_width(PixelFormat pixel_format, int width)
73 // Video seems to require isochronous packets scaled with the width;
74 // seemingly six lines is about right, rounded up to the required 1kB
76 // Note that for 10-bit input, you'll need to increase size accordingly.
78 if (pixel_format == PixelFormat_10BitYCbCr) {
79 stride = v210_stride(width);
81 stride = width * sizeof(uint16_t);
83 int size = stride * 6;
84 if (size % 1024 != 0) {
91 void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
93 assert(width >= MIN_WIDTH);
94 size_t size = find_xfer_size_for_width(pixel_format, width);
95 int num_iso_pack = xfr->length / size;
96 if (num_iso_pack != xfr->num_iso_packets ||
97 size != xfr->iso_packet_desc[0].length) {
98 xfr->num_iso_packets = num_iso_pack;
99 libusb_set_iso_packet_lengths(xfr, size);
103 struct VideoFormatEntry {
104 uint16_t normalized_video_format;
105 unsigned width, height, second_field_start;
106 unsigned extra_lines_top, extra_lines_bottom;
107 unsigned frame_rate_nom, frame_rate_den;
111 // Get details for the given video format; returns false if detection was incomplete.
112 bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
114 decoded_video_format->id = video_format;
115 decoded_video_format->interlaced = false;
117 // TODO: Add these for all formats as we find them.
118 decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
120 if (video_format == 0x0800) {
121 // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
122 // It's a strange thing, but what can you do.
123 decoded_video_format->width = 720;
124 decoded_video_format->height = 525;
125 decoded_video_format->stride = 720 * 2;
126 decoded_video_format->extra_lines_top = 0;
127 decoded_video_format->extra_lines_bottom = 0;
128 decoded_video_format->frame_rate_nom = 3013;
129 decoded_video_format->frame_rate_den = 100;
130 decoded_video_format->has_signal = false;
133 if ((video_format & 0xe000) != 0xe000) {
134 printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
136 decoded_video_format->width = 0;
137 decoded_video_format->height = 0;
138 decoded_video_format->stride = 0;
139 decoded_video_format->extra_lines_top = 0;
140 decoded_video_format->extra_lines_bottom = 0;
141 decoded_video_format->frame_rate_nom = 60;
142 decoded_video_format->frame_rate_den = 1;
143 decoded_video_format->has_signal = false;
147 decoded_video_format->has_signal = true;
149 // NTSC (480i59.94, I suppose). A special case, see below.
150 if ((video_format & ~0x0800) == 0xe101 ||
151 (video_format & ~0x0800) == 0xe1c1 ||
152 (video_format & ~0x0800) == 0xe001) {
153 decoded_video_format->width = 720;
154 decoded_video_format->height = 480;
155 if (video_format & 0x0800) {
156 decoded_video_format->stride = 720 * 2;
158 decoded_video_format->stride = v210_stride(720);
160 decoded_video_format->extra_lines_top = 17;
161 decoded_video_format->extra_lines_bottom = 28;
162 decoded_video_format->frame_rate_nom = 30000;
163 decoded_video_format->frame_rate_den = 1001;
164 decoded_video_format->second_field_start = 280;
165 decoded_video_format->interlaced = true;
169 // PAL (576i50, I suppose). A special case, see below.
170 if ((video_format & ~0x0800) == 0xe109 ||
171 (video_format & ~0x0800) == 0xe1c9 ||
172 (video_format & ~0x0800) == 0xe009 ||
173 (video_format & ~0x0800) == 0xe3e9 ||
174 (video_format & ~0x0800) == 0xe3e1) {
175 decoded_video_format->width = 720;
176 decoded_video_format->height = 576;
177 if (video_format & 0x0800) {
178 decoded_video_format->stride = 720 * 2;
180 decoded_video_format->stride = v210_stride(720);
182 decoded_video_format->extra_lines_top = 22;
183 decoded_video_format->extra_lines_bottom = 27;
184 decoded_video_format->frame_rate_nom = 25;
185 decoded_video_format->frame_rate_den = 1;
186 decoded_video_format->second_field_start = 335;
187 decoded_video_format->interlaced = true;
191 // 0x8 seems to be a flag about availability of deep color on the input,
192 // except when it's not (e.g. it's the only difference between NTSC
193 // and PAL). Rather confusing. But we clear it here nevertheless, because
194 // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
195 // (as opposed to 10-bit).
197 // 0x4 is a flag I've only seen from the D4. I don't know what it is.
198 uint16_t normalized_video_format = video_format & ~0xe80c;
199 constexpr VideoFormatEntry entries[] = {
200 { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
201 { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
202 { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
203 { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
204 { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
205 { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
206 { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
207 { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
208 { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
209 { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
210 { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
211 { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
212 { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
213 { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
214 { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
216 for (const VideoFormatEntry &entry : entries) {
217 if (normalized_video_format == entry.normalized_video_format) {
218 decoded_video_format->width = entry.width;
219 decoded_video_format->height = entry.height;
220 if (video_format & 0x0800) {
221 decoded_video_format->stride = entry.width * 2;
223 decoded_video_format->stride = v210_stride(entry.width);
225 decoded_video_format->second_field_start = entry.second_field_start;
226 decoded_video_format->extra_lines_top = entry.extra_lines_top;
227 decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
228 decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
229 decoded_video_format->frame_rate_den = entry.frame_rate_den;
230 decoded_video_format->interlaced = entry.interlaced;
235 printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
236 decoded_video_format->width = 1280;
237 decoded_video_format->height = 720;
238 decoded_video_format->stride = 1280 * 2;
239 decoded_video_format->frame_rate_nom = 60;
240 decoded_video_format->frame_rate_den = 1;
246 FrameAllocator::~FrameAllocator() {}
248 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
249 : frame_size(frame_size)
251 for (size_t i = 0; i < num_queued_frames; ++i) {
252 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
256 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
261 unique_lock<mutex> lock(freelist_mutex); // Meh.
262 if (freelist.empty()) {
263 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
266 vf.data = freelist.top().release();
267 vf.size = frame_size;
268 freelist.pop(); // Meh.
273 void MallocFrameAllocator::release_frame(Frame frame)
275 if (frame.overflow > 0) {
276 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
278 unique_lock<mutex> lock(freelist_mutex);
279 freelist.push(unique_ptr<uint8_t[]>(frame.data));
282 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
287 return (b - a < 0x8000);
289 int wrap_b = 0x10000 + int(b);
290 return (wrap_b - a < 0x8000);
294 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
296 unique_lock<mutex> lock(queue_lock);
297 if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
298 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
299 q->back().timecode, timecode);
300 frame.owner->release_frame(frame);
306 qf.timecode = timecode;
308 q->push_back(move(qf));
309 queues_not_empty.notify_one(); // might be spurious
312 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
314 FILE *fp = fopen(filename, "wb");
315 if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
316 printf("short write!\n");
321 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
323 fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
326 void BMUSBCapture::dequeue_thread_func()
328 char thread_name[16];
329 snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
330 pthread_setname_np(pthread_self(), thread_name);
332 if (has_dequeue_callbacks) {
333 dequeue_init_callback();
335 while (!dequeue_thread_should_quit) {
336 unique_lock<mutex> lock(queue_lock);
337 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
339 if (dequeue_thread_should_quit) break;
341 uint16_t video_timecode = pending_video_frames.front().timecode;
342 uint16_t audio_timecode = pending_audio_frames.front().timecode;
343 AudioFormat audio_format;
344 audio_format.bits_per_sample = 24;
345 audio_format.num_channels = 8;
346 if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
347 printf("Video block 0x%04x without corresponding audio block, dropping.\n",
349 QueuedFrame video_frame = pending_video_frames.front();
350 pending_video_frames.pop_front();
352 video_frame_allocator->release_frame(video_frame.frame);
353 } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
354 printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
356 QueuedFrame audio_frame = pending_audio_frames.front();
357 pending_audio_frames.pop_front();
359 audio_format.id = audio_frame.format;
361 // Use the video format of the pending frame.
362 QueuedFrame video_frame = pending_video_frames.front();
363 VideoFormat video_format;
364 decode_video_format(video_frame.format, &video_format);
366 frame_callback(audio_timecode,
367 FrameAllocator::Frame(), 0, video_format,
368 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
370 QueuedFrame video_frame = pending_video_frames.front();
371 QueuedFrame audio_frame = pending_audio_frames.front();
372 pending_audio_frames.pop_front();
373 pending_video_frames.pop_front();
378 snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
379 dump_frame(filename, video_frame.frame.data, video_frame.data_len);
380 dump_audio_block(audio_frame.frame.data, audio_frame.data_len);
383 VideoFormat video_format;
384 audio_format.id = audio_frame.format;
385 if (decode_video_format(video_frame.format, &video_format)) {
386 frame_callback(video_timecode,
387 video_frame.frame, HEADER_SIZE, video_format,
388 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
390 frame_callback(video_timecode,
391 FrameAllocator::Frame(), 0, video_format,
392 audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
396 if (has_dequeue_callbacks) {
397 dequeue_cleanup_callback();
401 void BMUSBCapture::start_new_frame(const uint8_t *start)
403 uint16_t format = (start[3] << 8) | start[2];
404 uint16_t timecode = (start[1] << 8) | start[0];
406 if (current_video_frame.len > 0) {
407 current_video_frame.received_timestamp = steady_clock::now();
409 // If format is 0x0800 (no signal), add a fake (empty) audio
410 // frame to get it out of the queue.
411 // TODO: Figure out if there are other formats that come with
412 // no audio, and treat them the same.
413 if (format == 0x0800) {
414 FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
415 if (fake_audio_frame.data == nullptr) {
416 // Oh well, it's just a no-signal frame anyway.
417 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
418 current_video_frame.owner->release_frame(current_video_frame);
419 current_video_frame = video_frame_allocator->alloc_frame();
422 queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
425 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
427 // Update the assumed frame width. We might be one frame too late on format changes,
428 // but it's much better than asking the user to choose manually.
429 VideoFormat video_format;
430 if (decode_video_format(format, &video_format)) {
431 assumed_frame_width = video_format.width;
434 //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
436 // //start[7], start[6], start[5], start[4],
437 // read_current_frame, FRAME_SIZE);
439 current_video_frame = video_frame_allocator->alloc_frame();
440 //if (current_video_frame.data == nullptr) {
441 // read_current_frame = -1;
443 // read_current_frame = 0;
447 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
449 uint16_t format = (start[3] << 8) | start[2];
450 uint16_t timecode = (start[1] << 8) | start[0];
451 if (current_audio_frame.len > 0) {
452 current_audio_frame.received_timestamp = steady_clock::now();
453 //dump_audio_block();
454 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
456 //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
457 // format, timecode, read_current_audio_block);
458 current_audio_frame = audio_frame_allocator->alloc_frame();
462 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
464 // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
465 for (unsigned j = 0; j < pack->actual_length; j++) {
466 //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
467 printf("%02x", xfr->buffer[j + offset]);
470 else if ((j % 8) == 7)
478 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
481 uint8_t *dptr1 = dest1;
482 uint8_t *dptr2 = dest2;
484 for (size_t i = 0; i < n; i += 2) {
490 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
492 if (current_frame->data == nullptr ||
493 current_frame->len > current_frame->size ||
498 int bytes = end - start;
499 if (current_frame->len + bytes > current_frame->size) {
500 current_frame->overflow = current_frame->len + bytes - current_frame->size;
501 current_frame->len = current_frame->size;
502 if (current_frame->overflow > 1048576) {
503 printf("%d bytes overflow after last %s frame\n",
504 int(current_frame->overflow), frame_type_name);
505 current_frame->overflow = 0;
509 if (current_frame->interleaved) {
510 uint8_t *data = current_frame->data + current_frame->len / 2;
511 uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
512 if (current_frame->len % 2 == 1) {
516 if (bytes % 2 == 1) {
519 ++current_frame->len;
522 memcpy_interleaved(data, data2, start, bytes);
523 current_frame->len += bytes;
525 memcpy(current_frame->data + current_frame->len, start, bytes);
526 current_frame->len += bytes;
532 void avx2_dump(const char *name, __m256i n)
534 printf("%-10s:", name);
535 printf(" %02x", _mm256_extract_epi8(n, 0));
536 printf(" %02x", _mm256_extract_epi8(n, 1));
537 printf(" %02x", _mm256_extract_epi8(n, 2));
538 printf(" %02x", _mm256_extract_epi8(n, 3));
539 printf(" %02x", _mm256_extract_epi8(n, 4));
540 printf(" %02x", _mm256_extract_epi8(n, 5));
541 printf(" %02x", _mm256_extract_epi8(n, 6));
542 printf(" %02x", _mm256_extract_epi8(n, 7));
544 printf(" %02x", _mm256_extract_epi8(n, 8));
545 printf(" %02x", _mm256_extract_epi8(n, 9));
546 printf(" %02x", _mm256_extract_epi8(n, 10));
547 printf(" %02x", _mm256_extract_epi8(n, 11));
548 printf(" %02x", _mm256_extract_epi8(n, 12));
549 printf(" %02x", _mm256_extract_epi8(n, 13));
550 printf(" %02x", _mm256_extract_epi8(n, 14));
551 printf(" %02x", _mm256_extract_epi8(n, 15));
553 printf(" %02x", _mm256_extract_epi8(n, 16));
554 printf(" %02x", _mm256_extract_epi8(n, 17));
555 printf(" %02x", _mm256_extract_epi8(n, 18));
556 printf(" %02x", _mm256_extract_epi8(n, 19));
557 printf(" %02x", _mm256_extract_epi8(n, 20));
558 printf(" %02x", _mm256_extract_epi8(n, 21));
559 printf(" %02x", _mm256_extract_epi8(n, 22));
560 printf(" %02x", _mm256_extract_epi8(n, 23));
562 printf(" %02x", _mm256_extract_epi8(n, 24));
563 printf(" %02x", _mm256_extract_epi8(n, 25));
564 printf(" %02x", _mm256_extract_epi8(n, 26));
565 printf(" %02x", _mm256_extract_epi8(n, 27));
566 printf(" %02x", _mm256_extract_epi8(n, 28));
567 printf(" %02x", _mm256_extract_epi8(n, 29));
568 printf(" %02x", _mm256_extract_epi8(n, 30));
569 printf(" %02x", _mm256_extract_epi8(n, 31));
574 #ifndef HAS_MULTIVERSIONING
576 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
578 // No fast path possible unless we have multiversioning.
582 #else // defined(HAS_MULTIVERSIONING)
584 __attribute__((target("sse4.1")))
585 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
587 __attribute__((target("avx2")))
588 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
590 // Does a memcpy and memchr in one to reduce processing time.
591 // Note that the benefit is somewhat limited if your L3 cache is small,
592 // as you'll (unfortunately) spend most of the time loading the data
595 // Complicated cases are left to the slow path; it basically stops copying
596 // up until the first instance of "sync_char" (usually a bit before, actually).
597 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
598 // data, and what we really need this for is the 00 00 ff ff marker in video data.
599 __attribute__((target("default")))
600 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
602 // No fast path possible unless we have SSE 4.1 or higher.
606 __attribute__((target("sse4.1", "avx2")))
607 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
609 if (current_frame->data == nullptr ||
610 current_frame->len > current_frame->size ||
614 size_t orig_bytes = limit - start;
615 if (orig_bytes < 128) {
620 // Don't read more bytes than we can write.
621 limit = min(limit, start + (current_frame->size - current_frame->len));
623 // Align end to 32 bytes.
624 limit = (const uint8_t *)(intptr_t(limit) & ~31);
626 if (start >= limit) {
630 // Process [0,31] bytes, such that start gets aligned to 32 bytes.
631 const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
632 if (aligned_start != start) {
633 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
634 if (sync_start == nullptr) {
635 add_to_frame(current_frame, "", start, aligned_start);
637 add_to_frame(current_frame, "", start, sync_start);
642 // Make the length a multiple of 64.
643 if (current_frame->interleaved) {
644 if (((limit - aligned_start) % 64) != 0) {
647 assert(((limit - aligned_start) % 64) == 0);
650 return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
653 __attribute__((target("avx2")))
654 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
656 const __m256i needle = _mm256_set1_epi8(sync_char);
658 const __restrict __m256i *in = (const __m256i *)aligned_start;
659 if (current_frame->interleaved) {
660 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
661 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
662 if (current_frame->len % 2 == 1) {
666 __m256i shuffle_cw = _mm256_set_epi8(
667 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
668 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
669 while (in < (const __m256i *)limit) {
670 // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
671 __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
672 __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
674 __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
675 __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
676 __m256i found = _mm256_or_si256(found1, found2);
678 data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
679 data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
681 data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
682 data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
684 __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
685 __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
687 _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
688 _mm256_storeu_si256(out2, hi);
690 if (!_mm256_testz_si256(found, found)) {
698 current_frame->len += (uint8_t *)in - aligned_start;
700 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
701 while (in < (const __m256i *)limit) {
702 __m256i data = _mm256_load_si256(in);
703 _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
704 __m256i found = _mm256_cmpeq_epi8(data, needle);
705 if (!_mm256_testz_si256(found, found)) {
712 current_frame->len = (uint8_t *)out - current_frame->data;
715 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
716 return (const uint8_t *)in;
719 __attribute__((target("sse4.1")))
720 const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
722 const __m128i needle = _mm_set1_epi8(sync_char);
724 const __m128i *in = (const __m128i *)aligned_start;
725 if (current_frame->interleaved) {
726 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
727 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
728 if (current_frame->len % 2 == 1) {
732 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
733 while (in < (const __m128i *)limit) {
734 __m128i data1 = _mm_load_si128(in);
735 __m128i data2 = _mm_load_si128(in + 1);
736 __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
737 __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
738 __m128i data1_hi = _mm_srli_epi16(data1, 8);
739 __m128i data2_hi = _mm_srli_epi16(data2, 8);
740 __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
741 _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used.
742 __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
743 _mm_storeu_si128(out2, hi);
744 __m128i found1 = _mm_cmpeq_epi8(data1, needle);
745 __m128i found2 = _mm_cmpeq_epi8(data2, needle);
746 if (!_mm_testz_si128(found1, found1) ||
747 !_mm_testz_si128(found2, found2)) {
755 current_frame->len += (uint8_t *)in - aligned_start;
757 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
758 while (in < (const __m128i *)limit) {
759 __m128i data = _mm_load_si128(in);
760 _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
761 __m128i found = _mm_cmpeq_epi8(data, needle);
762 if (!_mm_testz_si128(found, found)) {
769 current_frame->len = (uint8_t *)out - current_frame->data;
772 //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
773 return (const uint8_t *)in;
776 #endif // defined(HAS_MULTIVERSIONING)
778 void decode_packs(const libusb_transfer *xfr,
779 const char *sync_pattern,
781 FrameAllocator::Frame *current_frame,
782 const char *frame_type_name,
783 function<void(const uint8_t *start)> start_callback)
786 for (int i = 0; i < xfr->num_iso_packets; i++) {
787 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
789 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
790 fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
795 const uint8_t *start = xfr->buffer + offset;
796 const uint8_t *limit = start + pack->actual_length;
797 while (start < limit) { // Usually runs only one iteration.
798 start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
799 if (start == limit) break;
800 assert(start < limit);
802 const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
803 if (start_next_frame == nullptr) {
804 // add the rest of the buffer
805 add_to_frame(current_frame, frame_type_name, start, limit);
808 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
809 start = start_next_frame + sync_length; // skip sync
810 start_callback(start);
814 dump_pack(xfr, offset, pack);
816 offset += pack->length;
820 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
822 if (xfr->status != LIBUSB_TRANSFER_COMPLETED &&
823 xfr->status != LIBUSB_TRANSFER_NO_DEVICE) {
824 fprintf(stderr, "error: transfer status %d\n", xfr->status);
825 libusb_free_transfer(xfr);
829 assert(xfr->user_data != nullptr);
830 BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
832 if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) {
833 if (!usb->disconnected) {
834 fprintf(stderr, "Device went away, stopping transfers.\n");
835 usb->disconnected = true;
836 if (usb->card_disconnected_callback) {
837 usb->card_disconnected_callback();
840 // Don't reschedule the transfer; the loop will stop by itself.
844 if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
845 if (xfr->endpoint == 0x84) {
846 decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
848 decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
850 // Update the transfer with the new assumed width, if we're in the process of changing formats.
851 change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
854 if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
855 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
856 uint8_t *buf = libusb_control_transfer_get_data(xfr);
858 if (setup->wIndex == 44) {
859 printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
861 printf("read register %2d: 0x%02x%02x%02x%02x\n",
862 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
865 memcpy(usb->register_file + usb->current_register, buf, 4);
866 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
867 if (usb->current_register == 0) {
868 // read through all of them
869 printf("register dump:");
870 for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
871 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
875 libusb_fill_control_setup(xfr->buffer,
876 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
877 /*index=*/usb->current_register, /*length=*/4);
882 printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
883 for (i = 0; i < xfr->actual_length; i++) {
884 printf("%02x", xfr->buffer[i]);
894 int rc = libusb_submit_transfer(xfr);
896 fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
901 int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data)
903 if (card_connected_callback != nullptr) {
904 libusb_device_descriptor desc;
905 if (libusb_get_device_descriptor(dev, &desc) < 0) {
906 fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev);
907 libusb_unref_device(dev);
911 if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) ||
912 (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
913 card_connected_callback(dev); // Callback takes ownership.
917 libusb_unref_device(dev);
921 void BMUSBCapture::usb_thread_func()
924 memset(¶m, 0, sizeof(param));
925 param.sched_priority = 1;
926 if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
927 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
929 pthread_setname_np(pthread_self(), "bmusb_usb_drv");
930 while (!should_quit) {
931 timeval sec { 1, 0 };
932 int rc = libusb_handle_events_timeout(nullptr, &sec);
933 if (rc != LIBUSB_SUCCESS)
940 struct USBCardDevice {
943 libusb_device *device;
946 const char *get_product_name(uint16_t product)
948 if (product == 0xbd3b) {
949 return "Intensity Shuttle";
950 } else if (product == 0xbd4f) {
951 return "UltraStudio SDI";
958 string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product)
960 const char *product_name = get_product_name(product);
963 snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s",
964 id, bus, port, product_name);
968 vector<USBCardDevice> find_all_cards()
970 libusb_device **devices;
971 ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
972 if (num_devices == -1) {
973 fprintf(stderr, "Error finding USB devices\n");
976 vector<USBCardDevice> found_cards;
977 for (ssize_t i = 0; i < num_devices; ++i) {
978 libusb_device_descriptor desc;
979 if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
980 fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
984 uint8_t bus = libusb_get_bus_number(devices[i]);
985 uint8_t port = libusb_get_port_number(devices[i]);
987 if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) &&
988 !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) {
989 libusb_unref_device(devices[i]);
993 found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
995 libusb_free_device_list(devices, 0);
997 // Sort the devices to get a consistent ordering.
998 sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
999 if (a.product != b.product)
1000 return a.product < b.product;
1002 return a.bus < b.bus;
1003 return a.port < b.port;
1009 libusb_device_handle *open_card(int card_index, string *description)
1011 vector<USBCardDevice> found_cards = find_all_cards();
1013 for (size_t i = 0; i < found_cards.size(); ++i) {
1014 string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
1015 fprintf(stderr, "%s\n", tmp_description.c_str());
1016 if (i == size_t(card_index)) {
1017 *description = tmp_description;
1021 if (size_t(card_index) >= found_cards.size()) {
1022 fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
1026 libusb_device_handle *devh;
1027 int rc = libusb_open(found_cards[card_index].device, &devh);
1029 fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
1033 for (size_t i = 0; i < found_cards.size(); ++i) {
1034 libusb_unref_device(found_cards[i].device);
1040 libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description)
1042 uint8_t bus = libusb_get_bus_number(dev);
1043 uint8_t port = libusb_get_port_number(dev);
1045 libusb_device_descriptor desc;
1046 if (libusb_get_device_descriptor(dev, &desc) < 0) {
1047 fprintf(stderr, "Error getting device descriptor for device %p\n", dev);
1051 *description = get_card_description(card_index, bus, port, desc.idProduct);
1053 libusb_device_handle *devh;
1054 int rc = libusb_open(dev, &devh);
1056 fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc));
1065 unsigned BMUSBCapture::num_cards()
1067 int rc = libusb_init(nullptr);
1069 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1073 vector<USBCardDevice> found_cards = find_all_cards();
1074 unsigned ret = found_cards.size();
1075 for (size_t i = 0; i < found_cards.size(); ++i) {
1076 libusb_unref_device(found_cards[i].device);
1081 void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
1083 current_pixel_format = pixel_format;
1084 update_capture_mode();
1087 void BMUSBCapture::configure_card()
1089 if (video_frame_allocator == nullptr) {
1090 owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
1091 set_video_frame_allocator(owned_video_frame_allocator.get());
1093 if (audio_frame_allocator == nullptr) {
1094 owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
1095 set_audio_frame_allocator(owned_audio_frame_allocator.get());
1097 dequeue_thread_should_quit = false;
1098 dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
1101 struct libusb_transfer *xfr;
1103 rc = libusb_init(nullptr);
1105 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
1109 if (dev == nullptr) {
1110 devh = open_card(card_index, &description);
1112 devh = open_card(card_index, dev, &description);
1113 libusb_unref_device(dev);
1116 fprintf(stderr, "Error finding USB device\n");
1120 libusb_config_descriptor *config;
1121 rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
1123 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
1128 printf("%d interface\n", config->bNumInterfaces);
1129 for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
1130 printf(" interface %d\n", interface_number);
1131 const libusb_interface *interface = &config->interface[interface_number];
1132 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
1133 const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
1134 printf(" alternate setting %d\n", interface_desc->bAlternateSetting);
1135 for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
1136 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
1137 printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress);
1143 rc = libusb_set_configuration(devh, /*configuration=*/1);
1145 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
1149 rc = libusb_claim_interface(devh, 0);
1151 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
1155 // Alternate setting 1 is output, alternate setting 2 is input.
1156 // Card is reset when switching alternates, so the driver uses
1157 // this “double switch” when it wants to reset.
1159 // There's also alternate settings 3 and 4, which seem to be
1160 // like 1 and 2 except they advertise less bandwidth needed.
1161 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1163 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1164 if (rc == LIBUSB_ERROR_NOT_FOUND) {
1165 fprintf(stderr, "This is usually because the card came up in USB2 mode.\n");
1166 fprintf(stderr, "In particular, this tends to happen if you boot up with the\n");
1167 fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n");
1171 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
1173 fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
1177 rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
1179 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
1185 rc = libusb_claim_interface(devh, 3);
1187 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
1193 // 44 is some kind of timer register (first 16 bits count upwards)
1194 // 24 is some sort of watchdog?
1195 // you can seemingly set it to 0x73c60001 and that bit will eventually disappear
1196 // (or will go to 0x73c60010?), also seen 0x73c60100
1197 // 12 also changes all the time, unclear why
1198 // 16 seems to be autodetected mode somehow
1199 // -- this is e00115e0 after reset?
1200 // ed0115e0 after mode change [to output?]
1201 // 2d0015e0 after more mode change [to input]
1202 // ed0115e0 after more mode change
1203 // 2d0015e0 after more mode change
1205 // 390115e0 seems to indicate we have signal
1206 // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
1208 // 200015e0 on startup
1209 // changes to 250115e0 when we sync to the signal
1211 // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
1213 // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
1215 // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
1216 // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
1218 // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
1219 // perhaps some of them are related to analog output?
1221 // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
1222 // but the driver sets it to 0x8036802a at some point.
1224 // all of this is on request 214/215. other requests (192, 219,
1225 // 222, 223, 224) are used for firmware upgrade. Probably best to
1226 // stay out of it unless you know what you're doing.
1230 // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
1233 // 0x01 - stable signal
1234 // 0x04 - deep color
1235 // 0x08 - unknown (audio??)
1239 update_capture_mode();
1247 static const ctrl ctrls[] = {
1248 { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
1249 { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
1251 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
1252 //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
1253 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
1254 { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
1257 for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
1258 uint32_t flipped = htonl(ctrls[req].data);
1259 static uint8_t value[4];
1260 memcpy(value, &flipped, sizeof(flipped));
1261 int size = sizeof(value);
1262 //if (ctrls[req].request == 215) size = 0;
1263 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
1264 /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
1266 fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
1270 if (ctrls[req].index == 16 && rc == 4) {
1271 printf("Card firmware version: 0x%02x%02x\n", value[2], value[3]);
1275 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
1276 for (int i = 0; i < rc; ++i) {
1277 printf("%02x", value[i]);
1286 static int my_index = 0;
1287 static uint8_t value[4];
1288 int size = sizeof(value);
1289 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
1290 /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
1292 fprintf(stderr, "Error on control\n");
1295 printf("rc=%d index=%d: 0x", rc, my_index);
1296 for (int i = 0; i < rc; ++i) {
1297 printf("%02x", value[i]);
1304 // set up an asynchronous transfer of the timer register
1305 static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
1306 static int completed = 0;
1308 xfr = libusb_alloc_transfer(0);
1309 libusb_fill_control_setup(cmdbuf,
1310 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1311 /*index=*/44, /*length=*/4);
1312 libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
1313 xfr->user_data = this;
1314 libusb_submit_transfer(xfr);
1316 // set up an asynchronous transfer of register 24
1317 static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
1318 static int completed2 = 0;
1320 xfr = libusb_alloc_transfer(0);
1321 libusb_fill_control_setup(cmdbuf2,
1322 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1323 /*index=*/24, /*length=*/4);
1324 libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
1325 xfr->user_data = this;
1326 libusb_submit_transfer(xfr);
1329 // set up an asynchronous transfer of the register dump
1330 static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
1331 static int completed3 = 0;
1333 xfr = libusb_alloc_transfer(0);
1334 libusb_fill_control_setup(cmdbuf3,
1335 LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
1336 /*index=*/current_register, /*length=*/4);
1337 libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
1338 xfr->user_data = this;
1339 //libusb_submit_transfer(xfr);
1341 //audiofp = fopen("audio.raw", "wb");
1343 // set up isochronous transfers for audio and video
1344 for (int e = 3; e <= 4; ++e) {
1345 int num_transfers = 6;
1346 for (int i = 0; i < num_transfers; ++i) {
1348 int num_iso_pack, size;
1350 // Allocate for minimum width (because that will give us the most
1351 // number of packets, so we don't need to reallocate, but we'll
1352 // default to 720p for the first frame.
1353 size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
1354 num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
1355 buf_size = USB_VIDEO_TRANSFER_SIZE;
1359 buf_size = num_iso_pack * size;
1361 int num_bytes = num_iso_pack * size;
1362 assert(size_t(num_bytes) <= buf_size);
1363 #if LIBUSB_API_VERSION >= 0x01000105
1364 uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes);
1366 uint8_t *buf = nullptr;
1368 if (buf == nullptr) {
1369 fprintf(stderr, "Failed to allocate persistent DMA memory ");
1370 #if LIBUSB_API_VERSION >= 0x01000105
1371 fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n");
1373 fprintf(stderr, "(compiled against too old libusb-1.0).\n");
1375 fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n");
1376 buf = new uint8_t[num_bytes];
1379 xfr = libusb_alloc_transfer(num_iso_pack);
1381 fprintf(stderr, "oom\n");
1385 int ep = LIBUSB_ENDPOINT_IN | e;
1386 libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size,
1387 num_iso_pack, cb_xfr, nullptr, 0);
1388 libusb_set_iso_packet_lengths(xfr, size);
1389 xfr->user_data = this;
1392 change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
1395 iso_xfrs.push_back(xfr);
1400 void BMUSBCapture::start_bm_capture()
1403 for (libusb_transfer *xfr : iso_xfrs) {
1404 int rc = libusb_submit_transfer(xfr);
1407 //printf("num_bytes=%d\n", num_bytes);
1408 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
1409 xfr->endpoint, i, libusb_error_name(rc));
1416 libusb_release_interface(devh, 0);
1420 libusb_exit(nullptr);
1425 void BMUSBCapture::stop_dequeue_thread()
1427 dequeue_thread_should_quit = true;
1428 queues_not_empty.notify_all();
1429 dequeue_thread.join();
1432 void BMUSBCapture::start_bm_thread()
1434 // Devices leaving are discovered by seeing the isochronous packets
1435 // coming back with errors, so only care about devices joining.
1436 if (card_connected_callback != nullptr) {
1437 if (libusb_hotplug_register_callback(
1438 nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
1439 USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
1440 &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
1441 fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
1446 should_quit = false;
1447 usb_thread = thread(&BMUSBCapture::usb_thread_func);
1450 void BMUSBCapture::stop_bm_thread()
1456 map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
1458 // The USB3 cards autodetect, and seem to have no provision for forcing modes.
1459 VideoMode auto_mode;
1460 auto_mode.name = "Autodetect";
1461 auto_mode.autodetect = true;
1462 return {{ 0, auto_mode }};
1465 uint32_t BMUSBCapture::get_current_video_mode() const
1467 return 0; // Matches get_available_video_modes().
1470 void BMUSBCapture::set_video_mode(uint32_t video_mode_id)
1472 assert(video_mode_id == 0); // Matches get_available_video_modes().
1475 std::map<uint32_t, std::string> BMUSBCapture::get_available_video_inputs() const
1478 { 0x00000000, "HDMI/SDI" },
1479 { 0x02000000, "Component" },
1480 { 0x04000000, "Composite" },
1481 { 0x06000000, "S-video" }
1485 void BMUSBCapture::set_video_input(uint32_t video_input_id)
1487 assert((video_input_id & ~0x06000000) == 0);
1488 current_video_input = video_input_id;
1489 update_capture_mode();
1492 std::map<uint32_t, std::string> BMUSBCapture::get_available_audio_inputs() const
1495 { 0x00000000, "Embedded" },
1496 { 0x10000000, "Analog" }
1500 void BMUSBCapture::set_audio_input(uint32_t audio_input_id)
1502 assert((audio_input_id & ~0x10000000) == 0);
1503 current_audio_input = audio_input_id;
1504 update_capture_mode();
1507 void BMUSBCapture::update_capture_mode()
1509 if (devh == nullptr) {
1513 // Clearing the 0x08000000 bit seems to change the capture format (other source?).
1514 uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
1515 if (current_pixel_format == PixelFormat_8BitYCbCr) {
1516 mode |= htonl(0x20000000);
1518 assert(current_pixel_format == PixelFormat_10BitYCbCr);
1521 int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
1522 /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);
1524 fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc));
1529 } // namespace bmusb