X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=bmusb.cpp;h=cd4e59c4302b5d6cdba09080cd4b36c7337e7c2a;hb=d51fd416b5c2784b03ae50bda59cd01a881bf7df;hp=e89610e7a5f2d7cc28a34ed1a22f326a5221bca5;hpb=ec5eaa5e319e91f89db15dcbccdf64a056f710ae;p=bmusb diff --git a/bmusb.cpp b/bmusb.cpp index e89610e..cd4e59c 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -1,9 +1,13 @@ -// Intensity Shuttle USB3 prototype capture driver, v0.3 +// Intensity Shuttle USB3 capture driver, v0.4 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable // (can do captures for hours at a time with no drops), except during startup // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) +#define HAS_MULTIVERSIONING 1 +#endif + #include #include #include @@ -14,10 +18,10 @@ #include #include #include -#ifdef __SSE4_1__ +#if HAS_MULTIVERSIONING #include #endif -#include "bmusb.h" +#include "bmusb/bmusb.h" #include #include @@ -44,7 +48,10 @@ using namespace std::placeholders; #define FRAME_SIZE (8 << 20) // 8 MB. #define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB. +namespace bmusb { + card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr; +bool BMUSBCapture::hotplug_existing_devices = false; namespace { @@ -80,6 +87,122 @@ void change_xfer_size_for_width(int width, libusb_transfer *xfr) } } +struct VideoFormatEntry { + uint16_t normalized_video_format; + unsigned width, height, second_field_start; + unsigned extra_lines_top, extra_lines_bottom; + unsigned frame_rate_nom, frame_rate_den; + bool interlaced; +}; + +// Get details for the given video format; returns false if detection was incomplete. +bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format) +{ + decoded_video_format->id = video_format; + decoded_video_format->interlaced = false; + + // TODO: Add these for all formats as we find them. + decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0; + + if (video_format == 0x0800) { + // No video signal. These green pseudo-frames seem to come at about 30.13 Hz. + // It's a strange thing, but what can you do. + decoded_video_format->width = 720; + decoded_video_format->height = 525; + decoded_video_format->extra_lines_top = 0; + decoded_video_format->extra_lines_bottom = 0; + decoded_video_format->frame_rate_nom = 3013; + decoded_video_format->frame_rate_den = 100; + decoded_video_format->has_signal = false; + return true; + } + if ((video_format & 0xe800) != 0xe800) { + printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", + video_format); + decoded_video_format->width = 0; + decoded_video_format->height = 0; + decoded_video_format->extra_lines_top = 0; + decoded_video_format->extra_lines_bottom = 0; + decoded_video_format->frame_rate_nom = 60; + decoded_video_format->frame_rate_den = 1; + decoded_video_format->has_signal = false; + return false; + } + + decoded_video_format->has_signal = true; + + // NTSC (480i59.94, I suppose). A special case, see below. + if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) { + decoded_video_format->width = 720; + decoded_video_format->height = 480; + decoded_video_format->extra_lines_top = 17; + decoded_video_format->extra_lines_bottom = 28; + decoded_video_format->frame_rate_nom = 30000; + decoded_video_format->frame_rate_den = 1001; + decoded_video_format->second_field_start = 280; + decoded_video_format->interlaced = true; + return true; + } + + // PAL (576i50, I suppose). A special case, see below. + if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) { + decoded_video_format->width = 720; + decoded_video_format->height = 576; + decoded_video_format->extra_lines_top = 22; + decoded_video_format->extra_lines_bottom = 27; + decoded_video_format->frame_rate_nom = 25; + decoded_video_format->frame_rate_den = 1; + decoded_video_format->second_field_start = 335; + decoded_video_format->interlaced = true; + return true; + } + + // 0x8 seems to be a flag about availability of deep color on the input, + // except when it's not (e.g. it's the only difference between NTSC + // and PAL). Rather confusing. But we clear it here nevertheless, because + // usually it doesn't mean anything. + // + // 0x4 is a flag I've only seen from the D4. I don't know what it is. + uint16_t normalized_video_format = video_format & ~0xe80c; + constexpr VideoFormatEntry entries[] = { + { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). + { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. + { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). + { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. + { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. + { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. + { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. + { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30. + { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60. + { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97. + { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94. + { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25. + { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50. + { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24. + { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98. + }; + for (const VideoFormatEntry &entry : entries) { + if (normalized_video_format == entry.normalized_video_format) { + decoded_video_format->width = entry.width; + decoded_video_format->height = entry.height; + decoded_video_format->second_field_start = entry.second_field_start; + decoded_video_format->extra_lines_top = entry.extra_lines_top; + decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom; + decoded_video_format->frame_rate_nom = entry.frame_rate_nom; + decoded_video_format->frame_rate_den = entry.frame_rate_den; + decoded_video_format->interlaced = entry.interlaced; + return true; + } + } + + printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format); + decoded_video_format->width = 1280; + decoded_video_format->height = 720; + decoded_video_format->frame_rate_nom = 60; + decoded_video_format->frame_rate_den = 1; + return false; +} + } // namespace FrameAllocator::~FrameAllocator() {} @@ -360,8 +483,6 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } } -#ifdef __SSE4_1__ - #if 0 void avx2_dump(const char *name, __m256i n) { @@ -405,6 +526,18 @@ void avx2_dump(const char *name, __m256i n) } #endif +#ifndef HAS_MULTIVERSIONING + +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + // No fast path possible unless we have multiversioning. + return start; +} + +#else // defined(HAS_MULTIVERSIONING) + +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char); + // Does a memcpy and memchr in one to reduce processing time. // Note that the benefit is somewhat limited if your L3 cache is small, // as you'll (unfortunately) spend most of the time loading the data @@ -414,6 +547,14 @@ void avx2_dump(const char *name, __m256i n) // up until the first instance of "sync_char" (usually a bit before, actually). // This is fine, since 0x00 bytes shouldn't really show up in normal picture // data, and what we really need this for is the 00 00 ff ff marker in video data. +__attribute__((target("default"))) +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + // No fast path possible unless we have SSE 4.1 or higher. + return start; +} + +__attribute__((target("sse4.1", "avx2"))) const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) { if (current_frame->data == nullptr || @@ -457,7 +598,12 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const assert(((limit - aligned_start) % 64) == 0); } -#if __AVX2__ + return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char); +} + +__attribute__((target("avx2"))) +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char) +{ const __m256i needle = _mm256_set1_epi8(sync_char); const __restrict __m256i *in = (const __m256i *)aligned_start; @@ -516,7 +662,14 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const } current_frame->len = (uint8_t *)out - current_frame->data; } -#else + + //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); + return (const uint8_t *)in; +} + +__attribute__((target("sse4.1"))) +const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char) +{ const __m128i needle = _mm_set1_epi8(sync_char); const __m128i *in = (const __m128i *)aligned_start; @@ -566,13 +719,12 @@ const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const } current_frame->len = (uint8_t *)out - current_frame->data; } -#endif //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); - return (const uint8_t *)in; } -#endif + +#endif // defined(HAS_MULTIVERSIONING) void decode_packs(const libusb_transfer *xfr, const char *sync_pattern, @@ -594,11 +746,9 @@ void decode_packs(const libusb_transfer *xfr, const uint8_t *start = xfr->buffer + offset; const uint8_t *limit = start + pack->actual_length; while (start < limit) { // Usually runs only one iteration. -#ifdef __SSE4_1__ start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); if (start == limit) break; assert(start < limit); -#endif const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length); if (start_next_frame == nullptr) { @@ -728,12 +878,15 @@ void BMUSBCapture::usb_thread_func() printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno)); } while (!should_quit) { - int rc = libusb_handle_events(nullptr); + timeval sec { 1, 0 }; + int rc = libusb_handle_events_timeout(nullptr, &sec); if (rc != LIBUSB_SUCCESS) break; } } +namespace { + struct USBCardDevice { uint16_t product; uint8_t bus, port; @@ -762,7 +915,7 @@ string get_card_description(int id, uint8_t bus, uint8_t port, uint16_t product) return buf; } -libusb_device_handle *open_card(int card_index, string *description) +vector find_all_cards() { libusb_device **devices; ssize_t num_devices = libusb_get_device_list(nullptr, &devices); @@ -800,6 +953,13 @@ libusb_device_handle *open_card(int card_index, string *description) return a.port < b.port; }); + return found_cards; +} + +libusb_device_handle *open_card(int card_index, string *description) +{ + vector found_cards = find_all_cards(); + for (size_t i = 0; i < found_cards.size(); ++i) { string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product); fprintf(stderr, "%s\n", tmp_description.c_str()); @@ -850,6 +1010,24 @@ libusb_device_handle *open_card(unsigned card_index, libusb_device *dev, string return devh; } +} // namespace + +unsigned BMUSBCapture::num_cards() +{ + int rc = libusb_init(nullptr); + if (rc < 0) { + fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc)); + exit(1); + } + + vector found_cards = find_all_cards(); + unsigned ret = found_cards.size(); + for (size_t i = 0; i < found_cards.size(); ++i) { + libusb_unref_device(found_cards[i].device); + } + return ret; +} + void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { @@ -1197,7 +1375,7 @@ void BMUSBCapture::start_bm_thread() // coming back with errors, so only care about devices joining. if (card_connected_callback != nullptr) { if (libusb_hotplug_register_callback( - nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, LIBUSB_HOTPLUG_NO_FLAGS, + nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS, USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY, &BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) { fprintf(stderr, "libusb_hotplug_register_callback() failed\n"); @@ -1215,121 +1393,6 @@ void BMUSBCapture::stop_bm_thread() usb_thread.join(); } -struct VideoFormatEntry { - uint16_t normalized_video_format; - unsigned width, height, second_field_start; - unsigned extra_lines_top, extra_lines_bottom; - unsigned frame_rate_nom, frame_rate_den; - bool interlaced; -}; - -bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format) -{ - decoded_video_format->id = video_format; - decoded_video_format->interlaced = false; - - // TODO: Add these for all formats as we find them. - decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0; - - if (video_format == 0x0800) { - // No video signal. These green pseudo-frames seem to come at about 30.13 Hz. - // It's a strange thing, but what can you do. - decoded_video_format->width = 720; - decoded_video_format->height = 525; - decoded_video_format->extra_lines_top = 0; - decoded_video_format->extra_lines_bottom = 0; - decoded_video_format->frame_rate_nom = 3013; - decoded_video_format->frame_rate_den = 100; - decoded_video_format->has_signal = false; - return true; - } - if ((video_format & 0xe800) != 0xe800) { - printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", - video_format); - decoded_video_format->width = 0; - decoded_video_format->height = 0; - decoded_video_format->extra_lines_top = 0; - decoded_video_format->extra_lines_bottom = 0; - decoded_video_format->frame_rate_nom = 60; - decoded_video_format->frame_rate_den = 1; - decoded_video_format->has_signal = false; - return false; - } - - decoded_video_format->has_signal = true; - - // NTSC (480i59.94, I suppose). A special case, see below. - if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) { - decoded_video_format->width = 720; - decoded_video_format->height = 480; - decoded_video_format->extra_lines_top = 17; - decoded_video_format->extra_lines_bottom = 28; - decoded_video_format->frame_rate_nom = 30000; - decoded_video_format->frame_rate_den = 1001; - decoded_video_format->second_field_start = 280; - decoded_video_format->interlaced = true; - return true; - } - - // PAL (576i50, I suppose). A special case, see below. - if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) { - decoded_video_format->width = 720; - decoded_video_format->height = 576; - decoded_video_format->extra_lines_top = 22; - decoded_video_format->extra_lines_bottom = 27; - decoded_video_format->frame_rate_nom = 25; - decoded_video_format->frame_rate_den = 1; - decoded_video_format->second_field_start = 335; - decoded_video_format->interlaced = true; - return true; - } - - // 0x8 seems to be a flag about availability of deep color on the input, - // except when it's not (e.g. it's the only difference between NTSC - // and PAL). Rather confusing. But we clear it here nevertheless, because - // usually it doesn't mean anything. - // - // 0x4 is a flag I've only seen from the D4. I don't know what it is. - uint16_t normalized_video_format = video_format & ~0xe80c; - constexpr VideoFormatEntry entries[] = { - { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). - { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. - { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). - { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. - { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. - { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. - { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. - { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30. - { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60. - { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97. - { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94. - { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25. - { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50. - { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24. - { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98. - }; - for (const VideoFormatEntry &entry : entries) { - if (normalized_video_format == entry.normalized_video_format) { - decoded_video_format->width = entry.width; - decoded_video_format->height = entry.height; - decoded_video_format->second_field_start = entry.second_field_start; - decoded_video_format->extra_lines_top = entry.extra_lines_top; - decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom; - decoded_video_format->frame_rate_nom = entry.frame_rate_nom; - decoded_video_format->frame_rate_den = entry.frame_rate_den; - decoded_video_format->interlaced = entry.interlaced; - return true; - } - } - - printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format); - decoded_video_format->width = 1280; - decoded_video_format->height = 720; - decoded_video_format->frame_rate_nom = 60; - decoded_video_format->frame_rate_den = 1; - return false; -} - map BMUSBCapture::get_available_video_modes() const { // The USB3 cards autodetect, and seem to have no provision for forcing modes. @@ -1394,3 +1457,5 @@ void BMUSBCapture::update_capture_mode() exit(1); } } + +} // namespace bmusb