-// Intensity Shuttle USB3 prototype capture driver, v0.3
+// Intensity Shuttle USB3 capture driver, v0.5
// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
// (can do captures for hours at a time with no drops), except during startup
// 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
// Audio comes out as 8-channel 24-bit raw audio.
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
+#define HAS_MULTIVERSIONING 1
+#endif
+
#include <assert.h>
#include <errno.h>
#include <libusb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifdef __SSE4_1__
+#if HAS_MULTIVERSIONING
#include <immintrin.h>
#endif
-#include "bmusb.h"
+#include "bmusb/bmusb.h"
#include <algorithm>
#include <atomic>
#define FRAME_SIZE (8 << 20) // 8 MB.
#define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB.
+namespace bmusb {
+
card_connected_callback_t BMUSBCapture::card_connected_callback = nullptr;
+bool BMUSBCapture::hotplug_existing_devices = false;
namespace {
}
}
+struct VideoFormatEntry {
+ uint16_t normalized_video_format;
+ unsigned width, height, second_field_start;
+ unsigned extra_lines_top, extra_lines_bottom;
+ unsigned frame_rate_nom, frame_rate_den;
+ bool interlaced;
+};
+
+// Get details for the given video format; returns false if detection was incomplete.
+bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
+{
+ decoded_video_format->id = video_format;
+ decoded_video_format->interlaced = false;
+
+ // TODO: Add these for all formats as we find them.
+ decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
+
+ if (video_format == 0x0800) {
+ // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
+ // It's a strange thing, but what can you do.
+ decoded_video_format->width = 720;
+ decoded_video_format->height = 525;
+ decoded_video_format->extra_lines_top = 0;
+ decoded_video_format->extra_lines_bottom = 0;
+ decoded_video_format->frame_rate_nom = 3013;
+ decoded_video_format->frame_rate_den = 100;
+ decoded_video_format->has_signal = false;
+ return true;
+ }
+ if ((video_format & 0xe800) != 0xe800) {
+ printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
+ video_format);
+ decoded_video_format->width = 0;
+ decoded_video_format->height = 0;
+ decoded_video_format->extra_lines_top = 0;
+ decoded_video_format->extra_lines_bottom = 0;
+ decoded_video_format->frame_rate_nom = 60;
+ decoded_video_format->frame_rate_den = 1;
+ decoded_video_format->has_signal = false;
+ return false;
+ }
+
+ decoded_video_format->has_signal = true;
+
+ // NTSC (480i59.94, I suppose). A special case, see below.
+ if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
+ decoded_video_format->width = 720;
+ decoded_video_format->height = 480;
+ decoded_video_format->extra_lines_top = 17;
+ decoded_video_format->extra_lines_bottom = 28;
+ decoded_video_format->frame_rate_nom = 30000;
+ decoded_video_format->frame_rate_den = 1001;
+ decoded_video_format->second_field_start = 280;
+ decoded_video_format->interlaced = true;
+ return true;
+ }
+
+ // PAL (576i50, I suppose). A special case, see below.
+ if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
+ decoded_video_format->width = 720;
+ decoded_video_format->height = 576;
+ decoded_video_format->extra_lines_top = 22;
+ decoded_video_format->extra_lines_bottom = 27;
+ decoded_video_format->frame_rate_nom = 25;
+ decoded_video_format->frame_rate_den = 1;
+ decoded_video_format->second_field_start = 335;
+ decoded_video_format->interlaced = true;
+ return true;
+ }
+
+ // 0x8 seems to be a flag about availability of deep color on the input,
+ // except when it's not (e.g. it's the only difference between NTSC
+ // and PAL). Rather confusing. But we clear it here nevertheless, because
+ // usually it doesn't mean anything.
+ //
+ // 0x4 is a flag I've only seen from the D4. I don't know what it is.
+ uint16_t normalized_video_format = video_format & ~0xe80c;
+ constexpr VideoFormatEntry entries[] = {
+ { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
+ { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
+ { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
+ { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
+ { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
+ { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
+ { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
+ { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
+ { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
+ { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
+ { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
+ { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
+ { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
+ { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
+ { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
+ };
+ for (const VideoFormatEntry &entry : entries) {
+ if (normalized_video_format == entry.normalized_video_format) {
+ decoded_video_format->width = entry.width;
+ decoded_video_format->height = entry.height;
+ decoded_video_format->second_field_start = entry.second_field_start;
+ decoded_video_format->extra_lines_top = entry.extra_lines_top;
+ decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
+ decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
+ decoded_video_format->frame_rate_den = entry.frame_rate_den;
+ decoded_video_format->interlaced = entry.interlaced;
+ return true;
+ }
+ }
+
+ printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
+ decoded_video_format->width = 1280;
+ decoded_video_format->height = 720;
+ decoded_video_format->frame_rate_nom = 60;
+ decoded_video_format->frame_rate_den = 1;
+ return false;
+}
+
} // namespace
FrameAllocator::~FrameAllocator() {}
}
}
-#ifdef __SSE4_1__
-
#if 0
void avx2_dump(const char *name, __m256i n)
{
}
#endif
+#ifndef HAS_MULTIVERSIONING
+
+const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
+{
+ // No fast path possible unless we have multiversioning.
+ return start;
+}
+
+#else // defined(HAS_MULTIVERSIONING)
+
+const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char);
+
// Does a memcpy and memchr in one to reduce processing time.
// Note that the benefit is somewhat limited if your L3 cache is small,
// as you'll (unfortunately) spend most of the time loading the data
// up until the first instance of "sync_char" (usually a bit before, actually).
// This is fine, since 0x00 bytes shouldn't really show up in normal picture
// data, and what we really need this for is the 00 00 ff ff marker in video data.
+__attribute__((target("default")))
+const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
+{
+ // No fast path possible unless we have SSE 4.1 or higher.
+ return start;
+}
+
+__attribute__((target("sse4.1", "avx2")))
const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
{
if (current_frame->data == nullptr ||
assert(((limit - aligned_start) % 64) == 0);
}
-#if __AVX2__
+ return add_to_frame_fastpath_core(current_frame, aligned_start, limit, sync_char);
+}
+
+__attribute__((target("avx2")))
+const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
+{
const __m256i needle = _mm256_set1_epi8(sync_char);
const __restrict __m256i *in = (const __m256i *)aligned_start;
}
current_frame->len = (uint8_t *)out - current_frame->data;
}
-#else
+
+ //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
+ return (const uint8_t *)in;
+}
+
+__attribute__((target("sse4.1")))
+const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const uint8_t *aligned_start, const uint8_t *limit, const char sync_char)
+{
const __m128i needle = _mm_set1_epi8(sync_char);
const __m128i *in = (const __m128i *)aligned_start;
}
current_frame->len = (uint8_t *)out - current_frame->data;
}
-#endif
//printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
-
return (const uint8_t *)in;
}
-#endif
+
+#endif // defined(HAS_MULTIVERSIONING)
void decode_packs(const libusb_transfer *xfr,
const char *sync_pattern,
const uint8_t *start = xfr->buffer + offset;
const uint8_t *limit = start + pack->actual_length;
while (start < limit) { // Usually runs only one iteration.
-#ifdef __SSE4_1__
start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
if (start == limit) break;
assert(start < limit);
-#endif
const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
if (start_next_frame == nullptr) {
printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
}
while (!should_quit) {
- int rc = libusb_handle_events(nullptr);
+ timeval sec { 1, 0 };
+ int rc = libusb_handle_events_timeout(nullptr, &sec);
if (rc != LIBUSB_SUCCESS)
break;
}
}
+namespace {
+
struct USBCardDevice {
uint16_t product;
uint8_t bus, port;
return buf;
}
-libusb_device_handle *open_card(int card_index, string *description)
+vector<USBCardDevice> find_all_cards()
{
libusb_device **devices;
ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
return a.port < b.port;
});
+ return found_cards;
+}
+
+libusb_device_handle *open_card(int card_index, string *description)
+{
+ vector<USBCardDevice> found_cards = find_all_cards();
+
for (size_t i = 0; i < found_cards.size(); ++i) {
string tmp_description = get_card_description(i, found_cards[i].bus, found_cards[i].port, found_cards[i].product);
fprintf(stderr, "%s\n", tmp_description.c_str());
return devh;
}
+} // namespace
+
+unsigned BMUSBCapture::num_cards()
+{
+ int rc = libusb_init(nullptr);
+ if (rc < 0) {
+ fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
+ exit(1);
+ }
+
+ vector<USBCardDevice> found_cards = find_all_cards();
+ unsigned ret = found_cards.size();
+ for (size_t i = 0; i < found_cards.size(); ++i) {
+ libusb_unref_device(found_cards[i].device);
+ }
+ return ret;
+}
+
void BMUSBCapture::configure_card()
{
if (video_frame_allocator == nullptr) {
// coming back with errors, so only care about devices joining.
if (card_connected_callback != nullptr) {
if (libusb_hotplug_register_callback(
- nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, LIBUSB_HOTPLUG_NO_FLAGS,
+ nullptr, LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED, hotplug_existing_devices ? LIBUSB_HOTPLUG_ENUMERATE : LIBUSB_HOTPLUG_NO_FLAGS,
USB_VENDOR_BLACKMAGIC, LIBUSB_HOTPLUG_MATCH_ANY, LIBUSB_HOTPLUG_MATCH_ANY,
&BMUSBCapture::cb_hotplug, nullptr, nullptr) < 0) {
fprintf(stderr, "libusb_hotplug_register_callback() failed\n");
usb_thread.join();
}
-struct VideoFormatEntry {
- uint16_t normalized_video_format;
- unsigned width, height, second_field_start;
- unsigned extra_lines_top, extra_lines_bottom;
- unsigned frame_rate_nom, frame_rate_den;
- bool interlaced;
-};
-
-bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_format)
-{
- decoded_video_format->id = video_format;
- decoded_video_format->interlaced = false;
-
- // TODO: Add these for all formats as we find them.
- decoded_video_format->extra_lines_top = decoded_video_format->extra_lines_bottom = decoded_video_format->second_field_start = 0;
-
- if (video_format == 0x0800) {
- // No video signal. These green pseudo-frames seem to come at about 30.13 Hz.
- // It's a strange thing, but what can you do.
- decoded_video_format->width = 720;
- decoded_video_format->height = 525;
- decoded_video_format->extra_lines_top = 0;
- decoded_video_format->extra_lines_bottom = 0;
- decoded_video_format->frame_rate_nom = 3013;
- decoded_video_format->frame_rate_den = 100;
- decoded_video_format->has_signal = false;
- return true;
- }
- if ((video_format & 0xe800) != 0xe800) {
- printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
- video_format);
- decoded_video_format->width = 0;
- decoded_video_format->height = 0;
- decoded_video_format->extra_lines_top = 0;
- decoded_video_format->extra_lines_bottom = 0;
- decoded_video_format->frame_rate_nom = 60;
- decoded_video_format->frame_rate_den = 1;
- decoded_video_format->has_signal = false;
- return false;
- }
-
- decoded_video_format->has_signal = true;
-
- // NTSC (480i59.94, I suppose). A special case, see below.
- if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
- decoded_video_format->width = 720;
- decoded_video_format->height = 480;
- decoded_video_format->extra_lines_top = 17;
- decoded_video_format->extra_lines_bottom = 28;
- decoded_video_format->frame_rate_nom = 30000;
- decoded_video_format->frame_rate_den = 1001;
- decoded_video_format->second_field_start = 280;
- decoded_video_format->interlaced = true;
- return true;
- }
-
- // PAL (576i50, I suppose). A special case, see below.
- if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
- decoded_video_format->width = 720;
- decoded_video_format->height = 576;
- decoded_video_format->extra_lines_top = 22;
- decoded_video_format->extra_lines_bottom = 27;
- decoded_video_format->frame_rate_nom = 25;
- decoded_video_format->frame_rate_den = 1;
- decoded_video_format->second_field_start = 335;
- decoded_video_format->interlaced = true;
- return true;
- }
-
- // 0x8 seems to be a flag about availability of deep color on the input,
- // except when it's not (e.g. it's the only difference between NTSC
- // and PAL). Rather confusing. But we clear it here nevertheless, because
- // usually it doesn't mean anything.
- //
- // 0x4 is a flag I've only seen from the D4. I don't know what it is.
- uint16_t normalized_video_format = video_format & ~0xe80c;
- constexpr VideoFormatEntry entries[] = {
- { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
- { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
- { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
- { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
- { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
- { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
- { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
- { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
- { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
- { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
- { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
- { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
- { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
- { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
- { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
- };
- for (const VideoFormatEntry &entry : entries) {
- if (normalized_video_format == entry.normalized_video_format) {
- decoded_video_format->width = entry.width;
- decoded_video_format->height = entry.height;
- decoded_video_format->second_field_start = entry.second_field_start;
- decoded_video_format->extra_lines_top = entry.extra_lines_top;
- decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
- decoded_video_format->frame_rate_nom = entry.frame_rate_nom;
- decoded_video_format->frame_rate_den = entry.frame_rate_den;
- decoded_video_format->interlaced = entry.interlaced;
- return true;
- }
- }
-
- printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
- decoded_video_format->width = 1280;
- decoded_video_format->height = 720;
- decoded_video_format->frame_rate_nom = 60;
- decoded_video_format->frame_rate_den = 1;
- return false;
-}
-
map<uint32_t, VideoMode> BMUSBCapture::get_available_video_modes() const
{
// The USB3 cards autodetect, and seem to have no provision for forcing modes.
exit(1);
}
}
+
+} // namespace bmusb