X-Git-Url: https://git.sesse.net/?p=bmusb;a=blobdiff_plain;f=bmusb.cpp;h=0169d062dad8d1823ed441e29cc3938eee2c2e09;hp=85b92a6186c2e90debb70b4cc3fa85a0598d5eb7;hb=HEAD;hpb=30a75fd8110601c89ecc7c1a0832a96878917cd4 diff --git a/bmusb.cpp b/bmusb.cpp index 85b92a6..19a9da1 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -1,25 +1,32 @@ -// Intensity Shuttle USB3 prototype capture driver, v0.3 -// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable +// Intensity Shuttle USB3 capture driver, v0.7.8 +// Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable // (can do captures for hours at a time with no drops), except during startup // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) +#define HAS_MULTIVERSIONING 1 +#endif + #include #include #include +#include #include +#include #include #include #include #include #include -#ifdef __SSE4_1__ +#if HAS_MULTIVERSIONING #include #endif -#include "bmusb.h" +#include "bmusb/bmusb.h" #include #include +#include #include #include #include @@ -32,8 +39,10 @@ #include using namespace std; +using namespace std::chrono; using namespace std::placeholders; +#define USB_VENDOR_BLACKMAGIC 0x1edb #define MIN_WIDTH 640 #define HEADER_SIZE 44 //#define HEADER_SIZE 0 @@ -42,6 +51,11 @@ using namespace std::placeholders; #define FRAME_SIZE (8 << 20) // 8 MB. #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB. +namespace bmusb { + +card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr; +bool BMUSBCapture::hotplug_existing_devices = false; + namespace { FILE *audiofp; @@ -49,14 +63,24 @@ FILE *audiofp; thread usb_thread; atomic should_quit; -int find_xfer_size_for_width(int width) +int v210_stride(int width) +{ + return (width + 5) / 6 * 4 * sizeof(uint32_t); +} + +int find_xfer_size_for_width(PixelFormat pixel_format, int width) { // Video seems to require isochronous packets scaled with the width; // seemingly six lines is about right, rounded up to the required 1kB // multiple. - int size = width * 2 * 6; // Note that for 10-bit input, you'll need to increase size accordingly. - //size = size * 4 / 3; + int stride; + if (pixel_format == PixelFormat_10BitYCbCr) { + stride = v210_stride(width); + } else { + stride = width * sizeof(uint16_t); + } + int size = stride * 6; if (size % 1024 != 0) { size &= ~1023; size += 1024; @@ -64,10 +88,10 @@ int find_xfer_size_for_width(int width) return size; } -void change_xfer_size_for_width(int width, libusb_transfer *xfr) +void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr) { assert(width >= MIN_WIDTH); - size_t size = find_xfer_size_for_width(width); + size_t size = find_xfer_size_for_width(pixel_format, width); int num_iso_pack = xfr->length / size; if (num_iso_pack != xfr->num_iso_packets || size != xfr->iso_packet_desc[0].length) { @@ -76,29 +100,172 @@ void change_xfer_size_for_width(int width, libusb_transfer *xfr) } } -} // namespace +struct VideoFormatEntry { + uint16_t normalized_video_format; + unsigned width, height, second_field_start; + unsigned extra_lines_top, extra_lines_bottom; + unsigned frame_rate_nom, frame_rate_den; + bool interlaced; +}; -FrameAllocator::~FrameAllocator() {} +// Get details for the given video format; returns false if detection was incomplete. +bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format) +{ + decoded_video_format->id = video_format; + decoded_video_format->interlaced = false; -// Audio is more important than video, and also much cheaper. -// By having many more audio frames available, hopefully if something -// starts to drop, we'll have CPU load go down (from not having to -// process as much video) before we have to drop audio. -#define NUM_QUEUED_VIDEO_FRAMES 16 -#define NUM_QUEUED_AUDIO_FRAMES 64 + // TODO: Add these for all formats as we find them. + decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0; + + if (video_format == 0x0800) { + // No video signal. These green pseudo-frames seem to come at about 30.13 Hz. + // It's a strange thing, but what can you do. + decoded_video_format->width = 720; + decoded_video_format->height = 525; + decoded_video_format->stride = 720 * 2; + decoded_video_format->extra_lines_top = 0; + decoded_video_format->extra_lines_bottom = 0; + decoded_video_format->frame_rate_nom = 3013; + decoded_video_format->frame_rate_den = 100; + decoded_video_format->has_signal = false; + return true; + } + if ((video_format & 0xe000) != 0xe000) { + printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", + video_format); + decoded_video_format->width = 0; + decoded_video_format->height = 0; + decoded_video_format->stride = 0; + decoded_video_format->extra_lines_top = 0; + decoded_video_format->extra_lines_bottom = 0; + decoded_video_format->frame_rate_nom = 60; + decoded_video_format->frame_rate_den = 1; + decoded_video_format->has_signal = false; + return false; + } -class MallocFrameAllocator : public FrameAllocator { -public: - MallocFrameAllocator(size_t frame_size, size_t num_queued_frames); - Frame alloc_frame() override; - void release_frame(Frame frame) override; + decoded_video_format->has_signal = true; -private: - size_t frame_size; + // NTSC (480i59.94, I suppose). A special case, see below. + if ((video_format & ~0x0800) == 0xe101 || + (video_format & ~0x0800) == 0xe1c1 || + (video_format & ~0x0800) == 0xe001) { + decoded_video_format->width = 720; + decoded_video_format->height = 480; + if (video_format & 0x0800) { + decoded_video_format->stride = 720 * 2; + } else { + decoded_video_format->stride = v210_stride(720); + } + decoded_video_format->extra_lines_top = 17; + decoded_video_format->extra_lines_bottom = 28; + decoded_video_format->frame_rate_nom = 30000; + decoded_video_format->frame_rate_den = 1001; + decoded_video_format->second_field_start = 280; + decoded_video_format->interlaced = true; + return true; + } - mutex freelist_mutex; - stack> freelist; // All of size . -}; + // PAL (576i50, I suppose). A special case, see below. + if ((video_format & ~0x0800) == 0xe109 || + (video_format & ~0x0800) == 0xe1c9 || + (video_format & ~0x0800) == 0xe009 || + (video_format & ~0x0800) == 0xe3e9 || + (video_format & ~0x0800) == 0xe3e1) { + decoded_video_format->width = 720; + decoded_video_format->height = 576; + if (video_format & 0x0800) { + decoded_video_format->stride = 720 * 2; + } else { + decoded_video_format->stride = v210_stride(720); + } + decoded_video_format->extra_lines_top = 22; + decoded_video_format->extra_lines_bottom = 27; + decoded_video_format->frame_rate_nom = 25; + decoded_video_format->frame_rate_den = 1; + decoded_video_format->second_field_start = 335; + decoded_video_format->interlaced = true; + return true; + } + + // 0x8 seems to be a flag about availability of deep color on the input, + // except when it's not (e.g. it's the only difference between NTSC + // and PAL). Rather confusing. But we clear it here nevertheless, because + // usually it doesn't mean anything. 0x0800 appears to be 8-bit input + // (as opposed to 10-bit). + // + // 0x4 is a flag I've only seen from the D4. I don't know what it is. + uint16_t normalized_video_format = video_format & ~0xe80c; + constexpr VideoFormatEntry entries[] = { + { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). + { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. + { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. + { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). + { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. + { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. + { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. + { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. + { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. + { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30. + { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60. + { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97. + { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94. + { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25. + { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50. + { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24. + { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98. + }; + for (const VideoFormatEntry &entry : entries) { + if (normalized_video_format == entry.normalized_video_format) { + decoded_video_format->width = entry.width; + decoded_video_format->height = entry.height; + if (video_format & 0x0800) { + decoded_video_format->stride = entry.width * 2; + } else { + decoded_video_format->stride = v210_stride(entry.width); + } + decoded_video_format->second_field_start = entry.second_field_start; + decoded_video_format->extra_lines_top = entry.extra_lines_top; + decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom; + decoded_video_format->frame_rate_nom = entry.frame_rate_nom; + decoded_video_format->frame_rate_den = entry.frame_rate_den; + decoded_video_format->interlaced = entry.interlaced; + return true; + } + } + + printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format); + decoded_video_format->width = 1280; + decoded_video_format->height = 720; + decoded_video_format->stride = 1280 * 2; + decoded_video_format->frame_rate_nom = 60; + decoded_video_format->frame_rate_den = 1; + return false; +} + +// There are seemingly no direct indicators of sample rate; you just get +// one frame's worth and have to guess from that. +int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate) +{ + size_t num_samples = len / 3 / 8; + size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den; + + // See if we match or are very close to any of the mandatory HDMI sample rates. + const int candidate_sample_rates[] = { 32000, 44100, 48000 }; + for (int rate : candidate_sample_rates) { + if (abs(int(num_samples_per_second) - rate) <= 100) { + return rate; + } + } + + fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n", + num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate); + return default_rate; +} + +} // namespace + +FrameAllocator::~FrameAllocator() {} MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames) : frame_size(frame_size) @@ -175,14 +342,21 @@ void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len) void dump_audio_block(uint8_t *audio_start, size_t audio_len) { - fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + if (audiofp != nullptr) { + fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + } } void BMUSBCapture::dequeue_thread_func() { + char thread_name[16]; + snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index); + pthread_setname_np(pthread_self(), thread_name); + if (has_dequeue_callbacks) { dequeue_init_callback(); } + size_t last_sample_rate = 48000; while (!dequeue_thread_should_quit) { unique_lock lock(queue_lock); queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); }); @@ -194,6 +368,7 @@ void BMUSBCapture::dequeue_thread_func() AudioFormat audio_format; audio_format.bits_per_sample = 24; audio_format.num_channels = 8; + audio_format.sample_rate = last_sample_rate; if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) { printf("Video block 0x%04x without corresponding audio block, dropping.\n", video_timecode); @@ -208,8 +383,14 @@ void BMUSBCapture::dequeue_thread_func() pending_audio_frames.pop_front(); lock.unlock(); audio_format.id = audio_frame.format; + + // Use the video format of the pending frame. + QueuedFrame video_frame = pending_video_frames.front(); + VideoFormat video_format; + decode_video_format(video_frame.format, &video_format); + frame_callback(audio_timecode, - FrameAllocator::Frame(), 0, VideoFormat(), + FrameAllocator::Frame(), 0, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); } else { QueuedFrame video_frame = pending_video_frames.front(); @@ -228,10 +409,16 @@ void BMUSBCapture::dequeue_thread_func() VideoFormat video_format; audio_format.id = audio_frame.format; if (decode_video_format(video_frame.format, &video_format)) { + if (audio_frame.frame.len != 0) { + audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate); + last_sample_rate = audio_format.sample_rate; + } frame_callback(video_timecode, video_frame.frame, HEADER_SIZE, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); } else { + video_frame_allocator->release_frame(video_frame.frame); + audio_format.sample_rate = last_sample_rate; frame_callback(video_timecode, FrameAllocator::Frame(), 0, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); @@ -249,6 +436,8 @@ void BMUSBCapture::start_new_frame(const uint8_t *start) uint16_t timecode = (start[1] << 8) | start[0]; if (current_video_frame.len > 0) { + current_video_frame.received_timestamp = steady_clock::now(); + // If format is 0x0800 (no signal), add a fake (empty) audio // frame to get it out of the queue. // TODO: Figure out if there are other formats that come with @@ -292,11 +481,12 @@ void BMUSBCapture::start_new_audio_block(const uint8_t *start) uint16_t format = (start[3] << 8) | start[2]; uint16_t timecode = (start[1] << 8) | start[0]; if (current_audio_frame.len > 0) { + current_audio_frame.received_timestamp = steady_clock::now(); //dump_audio_block(); queue_frame(format, timecode, current_audio_frame, &pending_audio_frames); } - //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n", - // format, timecode, read_current_audio_block); + //printf("Found audio block start, format 0x%04x timecode 0x%04x\n", + // format, timecode); current_audio_frame = audio_frame_allocator->alloc_frame(); } @@ -348,6 +538,9 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } //dump_frame(); } else { + if (current_frame->data_copy != nullptr) { + memcpy(current_frame->data_copy + current_frame->len, start, bytes); + } if (current_frame->interleaved) { uint8_t *data = current_frame->data + current_frame->len / 2; uint8_t *data2 = current_frame->data2 + current_frame->len / 2; @@ -370,8 +563,6 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } } -#ifdef __SSE4_1__ - #if 0 void avx2_dump(const char *name, __m256i n) { @@ -415,6 +606,22 @@ void avx2_dump(const char *name, __m256i n) } #endif +#ifndef HAS_MULTIVERSIONING + +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + // No fast path possible unless we have multiversioning. + return start; +} + +#else // defined(HAS_MULTIVERSIONING) + +__attribute__((target("sse4.1"))) +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char); + +__attribute__((target("avx2"))) +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char); + // Does a memcpy and memchr in one to reduce processing time. // Note that the benefit is somewhat limited if your L3 cache is small, // as you'll (unfortunately) spend most of the time loading the data @@ -424,6 +631,14 @@ void avx2_dump(const char *name, __m256i n) // up until the first instance of "sync_char" (usually a bit before, actually). // This is fine, since 0x00 bytes shouldn't really show up in normal picture // data, and what we really need this for is the 00 00 ff ff marker in video data. +__attribute__((target("default"))) +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + // No fast path possible unless we have SSE 4.1 or higher. + return start; +} + +__attribute__((target("sse4.1", "avx2"))) const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) { if (current_frame->data == nullptr || @@ -467,9 +682,15 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const assert(((limit - aligned_start) % 64) == 0); } -#if __AVX2__ + return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char); +} + +__attribute__((target("avx2"))) +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char) +{ const __m256i needle = _mm256_set1_epi8(sync_char); + size_t bytes_copied; const __restrict __m256i *in = (const __m256i *)aligned_start; if (current_frame->interleaved) { __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); @@ -510,9 +731,10 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m256i *out = (__m256i *)old_end; while (in < (const __m256i *)limit) { __m256i data = _mm256_load_si256(in); _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. @@ -524,12 +746,26 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; } -#else + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); + } + current_frame->len += bytes_copied; + + //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); + return (const uint8_t *)in; +} + +__attribute__((target("sse4.1"))) +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char) +{ const __m128i needle = _mm_set1_epi8(sync_char); const __m128i *in = (const __m128i *)aligned_start; + size_t bytes_copied; if (current_frame->interleaved) { __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); @@ -560,9 +796,10 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m128i *out = (__m128i *)old_end; while (in < (const __m128i *)limit) { __m128i data = _mm_load_si128(in); _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. @@ -574,15 +811,20 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; } -#endif + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); + } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); - return (const uint8_t *)in; } -#endif + +#endif // defined(HAS_MULTIVERSIONING) void decode_packs(const libusb_transfer *xfr, const char *sync_pattern, @@ -604,11 +846,9 @@ void decode_packs(const libusb_transfer *xfr, const uint8_t *start = xfr->buffer + offset; const uint8_t *limit = start + pack->actual_length; while (start < limit) { // Usually runs only one iteration. -#ifdef __SSE4_1__ start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); if (start == limit) break; assert(start < limit); -#endif const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length); if (start_next_frame == nullptr) { @@ -630,8 +870,9 @@ void decode_packs(const libusb_transfer *xfr, void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) { - if (xfr->status != LIBUSB_TRANSFER_COMPLETED) { - fprintf(stderr, "transfer status %d\n", xfr->status); + if (xfr->status != LIBUSB_TRANSFER_COMPLETED && + xfr->status != LIBUSB_TRANSFER_NO_DEVICE) { + fprintf(stderr, "error: transfer status %d\n", xfr->status); libusb_free_transfer(xfr); exit(3); } @@ -639,6 +880,18 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) assert(xfr->user_data != nullptr); BMUSBCapture *usb = static_cast(xfr->user_data); + if (xfr->status == LIBUSB_TRANSFER_NO_DEVICE) { + if (!usb->disconnected) { + fprintf(stderr, "Device went away, stopping transfers.\n"); + usb->disconnected = true; + if (usb->card_disconnected_callback) { + usb->card_disconnected_callback(); + } + } + // Don't reschedule the transfer; the loop will stop by itself. + return; + } + if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) { if (xfr->endpoint == 0x84) { decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1)); @@ -646,7 +899,7 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); // Update the transfer with the new assumed width, if we're in the process of changing formats. - change_xfer_size_for_width(usb->assumed_frame_width, xfr); + change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr); } } if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { @@ -696,6 +949,26 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) } } +int BMUSBCapture::cb_hotplug(libusb_context *ctx, libusb_device *dev, libusb_hotplug_event event, void *user_data) +{ + if (card_connected_callback != nullptr) { + libusb_device_descriptor desc; + if (libusb_get_device_descriptor(dev, &desc) < 0) { + fprintf(stderr, "Error getting device descriptor for hotplugged device %p, killing hotplug\n", dev); + libusb_unref_device(dev); + return 1; + } + + if ((desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) || + (desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) { + card_connected_callback(dev); // Callback takes ownership. + return 0; + } + } + libusb_unref_device(dev); + return 0; +} + void BMUSBCapture::usb_thread_func() { sched_param param; @@ -704,21 +977,47 @@ void BMUSBCapture::usb_thread_func() if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) { printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno)); } + pthread_setname_np(pthread_self(), "bmusb_usb_drv"); while (!should_quit) { - int rc = libusb_handle_events(nullptr); + timeval sec { 1, 0 }; + int rc = libusb_handle_events_timeout(nullptr, &sec); if (rc != LIBUSB_SUCCESS) break; } } +namespace { + struct USBCardDevice { uint16_t product; uint8_t bus, port; libusb_device *device; }; -libusb_device_handle *open_card(int card_index, string *description) -{ +const char *get_product_name(uint16_t product) +{ + if (product == 0xbd3b) { + return "Intensity Shuttle"; + } else if (product == 0xbd4f) { + return "UltraStudio SDI"; + } else { + assert(false); + return nullptr; + } +} + +string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product) +{ + const char *product_name = get_product_name(product); + + char buf[256]; + snprintf(buf, sizeof(buf), "USB card %d: Bus %03u Device %03u %s", + id, bus, port, product_name); + return buf; +} + +vector find_all_cards() +{ libusb_device **devices; ssize_t num_devices = libusb_get_device_list(nullptr, &devices); if (num_devices == -1) { @@ -736,8 +1035,8 @@ libusb_device_handle *open_card(int card_index, string *description) uint8_t bus = libusb_get_bus_number(devices[i]); uint8_t port = libusb_get_port_number(devices[i]); - if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) && - !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) { + if (!(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd3b) && + !(desc.idVendor == USB_VENDOR_BLACKMAGIC && desc.idProduct == 0xbd4f)) { libusb_unref_device(devices[i]); continue; } @@ -755,23 +1054,19 @@ libusb_device_handle *open_card(int card_index, string *description) return a.port < b.port; }); - for (size_t i = 0; i < found_cards.size(); ++i) { - const char *product_name = nullptr; - if (found_cards[i].product == 0xbd3b) { - product_name = "Intensity Shuttle"; - } else if (found_cards[i].product == 0xbd4f) { - product_name = "UltraStudio SDI"; - } else { - assert(false); - } + return found_cards; +} - char buf[256]; - snprintf(buf, sizeof(buf), "Card %d: Bus %03u Device %03u %s", - int(i), found_cards[i].bus, found_cards[i].port, product_name); +libusb_device_handle *open_card(int card_index, string *description) +{ + vector found_cards = find_all_cards(); + + for (size_t i = 0; i < found_cards.size(); ++i) { + string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product); + fprintf(stderr, "%s\n", tmp_description.c_str()); if (i == size_t(card_index)) { - *description = buf; + *description = tmp_description; } - fprintf(stderr, "%s\n", buf); } if (size_t(card_index) >= found_cards.size()) { @@ -793,13 +1088,62 @@ libusb_device_handle *open_card(int card_index, string *description) return devh; } +libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string *description) +{ + uint8_t bus = libusb_get_bus_number(dev); + uint8_t port = libusb_get_port_number(dev); + + libusb_device_descriptor desc; + if (libusb_get_device_descriptor(dev, &desc) < 0) { + fprintf(stderr, "Error getting device descriptor for device %p\n", dev); + exit(1); + } + + *description = get_card_description(card_index, bus, port, desc.idProduct); + + libusb_device_handle *devh; + int rc = libusb_open(dev, &devh); + if (rc < 0) { + fprintf(stderr, "Error opening card %p: %s\n", dev, libusb_error_name(rc)); + exit(1); + } + + return devh; +} + +} // namespace + +unsigned BMUSBCapture::num_cards() +{ + int rc = libusb_init(nullptr); + if (rc < 0) { + fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc)); + exit(1); + } + + vector found_cards = find_all_cards(); + unsigned ret = found_cards.size(); + for (size_t i = 0; i < found_cards.size(); ++i) { + libusb_unref_device(found_cards[i].device); + } + return ret; +} + +void BMUSBCapture::set_pixel_format(PixelFormat pixel_format) +{ + current_pixel_format = pixel_format; + update_capture_mode(); +} + void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { - set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak. + owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); + set_video_frame_allocator(owned_video_frame_allocator.get()); } if (audio_frame_allocator == nullptr) { - set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak. + owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); + set_audio_frame_allocator(owned_audio_frame_allocator.get()); } dequeue_thread_should_quit = false; dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this); @@ -813,7 +1157,12 @@ void BMUSBCapture::configure_card() exit(1); } - libusb_device_handle *devh = open_card(card_index, &description); + if (dev == nullptr) { + devh = open_card(card_index, &description); + } else { + devh = open_card(card_index, dev, &description); + libusb_unref_device(dev); + } if (!devh) { fprintf(stderr, "Error finding USB device\n"); exit(1); @@ -863,6 +1212,11 @@ void BMUSBCapture::configure_card() rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); if (rc < 0) { fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); + if (rc == LIBUSB_ERROR_NOT_FOUND) { + fprintf(stderr, "This is usually because the card came up in USB2 mode.\n"); + fprintf(stderr, "In particular, this tends to happen if you boot up with the\n"); + fprintf(stderr, "card plugged in; just unplug and replug it, and it usually works.\n"); + } exit(1); } rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2); @@ -933,6 +1287,8 @@ void BMUSBCapture::configure_card() // 0x20 - 720p?? // 0x30 - 576p?? + update_capture_mode(); + struct ctrl { int endpoint; int request; @@ -943,14 +1299,6 @@ void BMUSBCapture::configure_card() { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, - // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit - // capture (v210). - // clearing the 0x08000000 bit seems to change the capture format (other source?) - // 0x10000000 = analog audio instead of embedded audio, it seems - // 0x3a000000 = component video? (analog audio) - // 0x3c000000 = composite video? (analog audio) - // 0x3e000000 = s-video? (analog audio) - { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 }, //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start? @@ -1041,20 +1389,19 @@ void BMUSBCapture::configure_card() xfr->user_data = this; //libusb_submit_transfer(xfr); - audiofp = fopen("audio.raw", "wb"); + //audiofp = fopen("audio.raw", "wb"); // set up isochronous transfers for audio and video for (int e = 3; e <= 4; ++e) { - //int num_transfers = (e == 3) ? 6 : 6; - int num_transfers = 10; + int num_transfers = 6; for (int i = 0; i < num_transfers; ++i) { size_t buf_size; int num_iso_pack, size; if (e == 3) { // Allocate for minimum width (because that will give us the most - // number of packets, so we don't need to reallocated, but we'll + // number of packets, so we don't need to reallocate, but we'll // default to 720p for the first frame. - size = find_xfer_size_for_width(MIN_WIDTH); + size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH); num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size; buf_size = USB_VIDEO_TRANSFER_SIZE; } else { @@ -1062,8 +1409,23 @@ void BMUSBCapture::configure_card() num_iso_pack = 80; buf_size = num_iso_pack * size; } - assert(size_t(num_iso_pack * size) <= buf_size); - uint8_t *buf = new uint8_t[buf_size]; + int num_bytes = num_iso_pack * size; + assert(size_t(num_bytes) <= buf_size); +#if LIBUSB_API_VERSION >= 0x01000105 + uint8_t *buf = libusb_dev_mem_alloc(devh, num_bytes); +#else + uint8_t *buf = nullptr; +#endif + if (buf == nullptr) { + fprintf(stderr, "Failed to allocate persistent DMA memory "); +#if LIBUSB_API_VERSION >= 0x01000105 + fprintf(stderr, "(probably too old kernel; use 4.6.0 or newer).\n"); +#else + fprintf(stderr, "(compiled against too old libusb-1.0).\n"); +#endif + fprintf(stderr, "Will go slower, and likely fail due to memory fragmentation after a few hours.\n"); + buf = new uint8_t[num_bytes]; + } xfr = libusb_alloc_transfer(num_iso_pack); if (!xfr) { @@ -1078,7 +1440,7 @@ void BMUSBCapture::configure_card() xfr->user_data = this; if (e == 3) { - change_xfer_size_for_width(assumed_frame_width, xfr); + change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr); } iso_xfrs.push_back(xfr); @@ -1120,6 +1482,18 @@ void BMUSBCapture::stop_dequeue_thread() void BMUSBCapture::start_bm_thread() { + // Devices leaving are discovered by seeing the isochronous packets + // coming back with errors, so only care about devices joining. + if (card_connected_callback != nullptr) { + if (libusb_hotplug_register_callback( + nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS, + USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY, + &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) { + fprintf(stderr, "libusb_hotplug_register_callback() failed\n"); + exit(1); + } + } + should_quit = false; usb_thread = thread(&BMUSBCapture::usb_thread_func); } @@ -1127,120 +1501,81 @@ void BMUSBCapture::start_bm_thread() void BMUSBCapture::stop_bm_thread() { should_quit = true; + libusb_interrupt_event_handler(nullptr); usb_thread.join(); } -struct VideoFormatEntry { - uint16_t normalized_video_format; - unsigned width, height, second_field_start; - unsigned extra_lines_top, extra_lines_bottom; - unsigned frame_rate_nom, frame_rate_den; - bool interlaced; -}; +map BMUSBCapture::get_available_video_modes() const +{ + // The USB3 cards autodetect, and seem to have no provision for forcing modes. + VideoMode auto_mode; + auto_mode.name = "Autodetect"; + auto_mode.autodetect = true; + return {{ 0, auto_mode }}; +} -bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format) +uint32_t BMUSBCapture::get_current_video_mode() const { - decoded_video_format->id = video_format; - decoded_video_format->interlaced = false; + return 0; // Matches get_available_video_modes(). +} - // TODO: Add these for all formats as we find them. - decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0; +void BMUSBCapture::set_video_mode(uint32_t video_mode_id) +{ + assert(video_mode_id == 0); // Matches get_available_video_modes(). +} - if (video_format == 0x0800) { - // No video signal. These green pseudo-frames seem to come at about 30.13 Hz. - // It's a strange thing, but what can you do. - decoded_video_format->width = 720; - decoded_video_format->height = 525; - decoded_video_format->extra_lines_top = 0; - decoded_video_format->extra_lines_bottom = 0; - decoded_video_format->frame_rate_nom = 3013; - decoded_video_format->frame_rate_den = 100; - decoded_video_format->has_signal = false; - return true; - } - if ((video_format & 0xe800) != 0xe800) { - printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", - video_format); - decoded_video_format->width = 0; - decoded_video_format->height = 0; - decoded_video_format->extra_lines_top = 0; - decoded_video_format->extra_lines_bottom = 0; - decoded_video_format->frame_rate_nom = 60; - decoded_video_format->frame_rate_den = 1; - decoded_video_format->has_signal = false; - return false; - } +std::map BMUSBCapture::get_available_video_inputs() const +{ + return { + { 0x00000000, "HDMI/SDI" }, + { 0x02000000, "Component" }, + { 0x04000000, "Composite" }, + { 0x06000000, "S-video" } + }; +} - decoded_video_format->has_signal = true; +void BMUSBCapture::set_video_input(uint32_t video_input_id) +{ + assert((video_input_id & ~0x06000000) == 0); + current_video_input = video_input_id; + update_capture_mode(); +} - // NTSC (480i59.94, I suppose). A special case, see below. - if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) { - decoded_video_format->width = 720; - decoded_video_format->height = 480; - decoded_video_format->extra_lines_top = 17; - decoded_video_format->extra_lines_bottom = 28; - decoded_video_format->frame_rate_nom = 30000; - decoded_video_format->frame_rate_den = 1001; - decoded_video_format->second_field_start = 280; - decoded_video_format->interlaced = true; - return true; - } +std::map BMUSBCapture::get_available_audio_inputs() const +{ + return { + { 0x00000000, "Embedded" }, + { 0x10000000, "Analog" } + }; +} - // PAL (576i50, I suppose). A special case, see below. - if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) { - decoded_video_format->width = 720; - decoded_video_format->height = 576; - decoded_video_format->extra_lines_top = 22; - decoded_video_format->extra_lines_bottom = 27; - decoded_video_format->frame_rate_nom = 25; - decoded_video_format->frame_rate_den = 1; - decoded_video_format->second_field_start = 335; - decoded_video_format->interlaced = true; - return true; +void BMUSBCapture::set_audio_input(uint32_t audio_input_id) +{ + assert((audio_input_id & ~0x10000000) == 0); + current_audio_input = audio_input_id; + update_capture_mode(); +} + +void BMUSBCapture::update_capture_mode() +{ + if (devh == nullptr) { + return; } - // 0x8 seems to be a flag about availability of deep color on the input, - // except when it's not (e.g. it's the only difference between NTSC - // and PAL). Rather confusing. But we clear it here nevertheless, because - // usually it doesn't mean anything. - // - // 0x4 is a flag I've only seen from the D4. I don't know what it is. - uint16_t normalized_video_format = video_format & ~0xe80c; - constexpr VideoFormatEntry entries[] = { - { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). - { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. - { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). - { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. - { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. - { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. - { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. - { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30. - { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60. - { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97. - { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94. - { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25. - { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50. - { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24. - { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98. - }; - for (const VideoFormatEntry &entry : entries) { - if (normalized_video_format == entry.normalized_video_format) { - decoded_video_format->width = entry.width; - decoded_video_format->height = entry.height; - decoded_video_format->second_field_start = entry.second_field_start; - decoded_video_format->extra_lines_top = entry.extra_lines_top; - decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom; - decoded_video_format->frame_rate_nom = entry.frame_rate_nom; - decoded_video_format->frame_rate_den = entry.frame_rate_den; - decoded_video_format->interlaced = entry.interlaced; - return true; - } + // Clearing the 0x08000000 bit seems to change the capture format (other source?). + uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input); + if (current_pixel_format == PixelFormat_8BitYCbCr) { + mode |= htonl(0x20000000); + } else { + assert(current_pixel_format == PixelFormat_10BitYCbCr); } - printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format); - decoded_video_format->width = 1280; - decoded_video_format->height = 720; - decoded_video_format->frame_rate_nom = 60; - decoded_video_format->frame_rate_den = 1; - return false; + int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT, + /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0); + if (rc < 0) { + fprintf(stderr, "Error on setting mode: %s\n", libusb_error_name(rc)); + exit(1); + } } + +} // namespace bmusb