-// Intensity Shuttle USB3 capture driver, v0.5.2
-// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
+// Intensity Shuttle USB3 capture driver, v0.7.5
+// Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
// (can do captures for hours at a time with no drops), except during startup
// 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
// Audio comes out as 8-channel 24-bit raw audio.
#include <libusb.h>
#include <unistd.h>
#include <netinet/in.h>
+#include <pthread.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
thread usb_thread;
atomic<bool> should_quit;
-int find_xfer_size_for_width(int width)
+int v210_stride(int width)
+{
+ return (width + 5) / 6 * 4 * sizeof(uint32_t);
+}
+
+int find_xfer_size_for_width(PixelFormat pixel_format, int width)
{
// Video seems to require isochronous packets scaled with the width;
// seemingly six lines is about right, rounded up to the required 1kB
// multiple.
- int size = width * 2 * 6;
// Note that for 10-bit input, you'll need to increase size accordingly.
- //size = size * 4 / 3;
+ int stride;
+ if (pixel_format == PixelFormat_10BitYCbCr) {
+ stride = v210_stride(width);
+ } else {
+ stride = width * sizeof(uint16_t);
+ }
+ int size = stride * 6;
if (size % 1024 != 0) {
size &= ~1023;
size += 1024;
return size;
}
-void change_xfer_size_for_width(int width, libusb_transfer *xfr)
+void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr)
{
assert(width >= MIN_WIDTH);
- size_t size = find_xfer_size_for_width(width);
+ size_t size = find_xfer_size_for_width(pixel_format, width);
int num_iso_pack = xfr->length / size;
if (num_iso_pack != xfr->num_iso_packets ||
size != xfr->iso_packet_desc[0].length) {
// It's a strange thing, but what can you do.
decoded_video_format->width = 720;
decoded_video_format->height = 525;
+ decoded_video_format->stride = 720 * 2;
decoded_video_format->extra_lines_top = 0;
decoded_video_format->extra_lines_bottom = 0;
decoded_video_format->frame_rate_nom = 3013;
decoded_video_format->has_signal = false;
return true;
}
- if ((video_format & 0xe800) != 0xe800) {
+ if ((video_format & 0xe000) != 0xe000) {
printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n",
video_format);
decoded_video_format->width = 0;
decoded_video_format->height = 0;
+ decoded_video_format->stride = 0;
decoded_video_format->extra_lines_top = 0;
decoded_video_format->extra_lines_bottom = 0;
decoded_video_format->frame_rate_nom = 60;
decoded_video_format->has_signal = true;
// NTSC (480i59.94, I suppose). A special case, see below.
- if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) {
+ if ((video_format & ~0x0800) == 0xe101 ||
+ (video_format & ~0x0800) == 0xe1c1 ||
+ (video_format & ~0x0800) == 0xe001) {
decoded_video_format->width = 720;
decoded_video_format->height = 480;
+ if (video_format & 0x0800) {
+ decoded_video_format->stride = 720 * 2;
+ } else {
+ decoded_video_format->stride = v210_stride(720);
+ }
decoded_video_format->extra_lines_top = 17;
decoded_video_format->extra_lines_bottom = 28;
decoded_video_format->frame_rate_nom = 30000;
}
// PAL (576i50, I suppose). A special case, see below.
- if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) {
+ if ((video_format & ~0x0800) == 0xe109 ||
+ (video_format & ~0x0800) == 0xe1c9 ||
+ (video_format & ~0x0800) == 0xe009 ||
+ (video_format & ~0x0800) == 0xe3e9 ||
+ (video_format & ~0x0800) == 0xe3e1) {
decoded_video_format->width = 720;
decoded_video_format->height = 576;
+ if (video_format & 0x0800) {
+ decoded_video_format->stride = 720 * 2;
+ } else {
+ decoded_video_format->stride = v210_stride(720);
+ }
decoded_video_format->extra_lines_top = 22;
decoded_video_format->extra_lines_bottom = 27;
decoded_video_format->frame_rate_nom = 25;
// 0x8 seems to be a flag about availability of deep color on the input,
// except when it's not (e.g. it's the only difference between NTSC
// and PAL). Rather confusing. But we clear it here nevertheless, because
- // usually it doesn't mean anything.
+ // usually it doesn't mean anything. 0x0800 appears to be 8-bit input
+ // (as opposed to 10-bit).
//
// 0x4 is a flag I've only seen from the D4. I don't know what it is.
uint16_t normalized_video_format = video_format & ~0xe80c;
constexpr VideoFormatEntry entries[] = {
{ 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed).
{ 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
+ { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50.
{ 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4).
{ 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
+ { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50.
{ 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
{ 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60.
{ 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94.
- { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30.
+ { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30.
{ 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60.
- { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97.
+ { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97.
{ 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94.
- { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25.
- { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50.
- { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24.
- { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98.
+ { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25.
+ { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50.
+ { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24.
+ { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98.
};
for (const VideoFormatEntry &entry : entries) {
if (normalized_video_format == entry.normalized_video_format) {
decoded_video_format->width = entry.width;
decoded_video_format->height = entry.height;
+ if (video_format & 0x0800) {
+ decoded_video_format->stride = entry.width * 2;
+ } else {
+ decoded_video_format->stride = v210_stride(entry.width);
+ }
decoded_video_format->second_field_start = entry.second_field_start;
decoded_video_format->extra_lines_top = entry.extra_lines_top;
decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom;
printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format);
decoded_video_format->width = 1280;
decoded_video_format->height = 720;
+ decoded_video_format->stride = 1280 * 2;
decoded_video_format->frame_rate_nom = 60;
decoded_video_format->frame_rate_den = 1;
return false;
}
+// There are seemingly no direct indicators of sample rate; you just get
+// one frame's worth and have to guess from that.
+int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate)
+{
+ size_t num_samples = len / 3 / 8;
+ size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den;
+
+ // See if we match or are very close to any of the mandatory HDMI sample rates.
+ const int candidate_sample_rates[] = { 32000, 44100, 48000 };
+ for (int rate : candidate_sample_rates) {
+ if (abs(int(num_samples_per_second) - rate) <= 100) {
+ return rate;
+ }
+ }
+
+ fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n",
+ num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate);
+ return default_rate;
+}
+
} // namespace
FrameAllocator::~FrameAllocator() {}
void dump_audio_block(uint8_t *audio_start, size_t audio_len)
{
- fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
+ if (audiofp != nullptr) {
+ fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
+ }
}
void BMUSBCapture::dequeue_thread_func()
{
+ char thread_name[16];
+ snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index);
+ pthread_setname_np(pthread_self(), thread_name);
+
if (has_dequeue_callbacks) {
dequeue_init_callback();
}
+ size_t last_sample_rate = 48000;
while (!dequeue_thread_should_quit) {
unique_lock<mutex> lock(queue_lock);
queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
AudioFormat audio_format;
audio_format.bits_per_sample = 24;
audio_format.num_channels = 8;
+ audio_format.sample_rate = last_sample_rate;
if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
printf("Video block 0x%04x without corresponding audio block, dropping.\n",
video_timecode);
VideoFormat video_format;
audio_format.id = audio_frame.format;
if (decode_video_format(video_frame.format, &video_format)) {
+ if (audio_frame.frame.len != 0) {
+ audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate);
+ last_sample_rate = audio_format.sample_rate;
+ }
frame_callback(video_timecode,
video_frame.frame, HEADER_SIZE, video_format,
audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
} else {
+ video_frame_allocator->release_frame(video_frame.frame);
+ audio_format.sample_rate = last_sample_rate;
frame_callback(video_timecode,
FrameAllocator::Frame(), 0, video_format,
audio_frame.frame, AUDIO_HEADER_SIZE, audio_format);
//dump_audio_block();
queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
}
- //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
- // format, timecode, read_current_audio_block);
+ //printf("Found audio block start, format 0x%04x timecode 0x%04x\n",
+ // format, timecode);
current_audio_frame = audio_frame_allocator->alloc_frame();
}
}
//dump_frame();
} else {
+ if (current_frame->data_copy != nullptr) {
+ memcpy(current_frame->data_copy + current_frame->len, start, bytes);
+ }
if (current_frame->interleaved) {
uint8_t *data = current_frame->data + current_frame->len / 2;
uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
{
const __m256i needle = _mm256_set1_epi8(sync_char);
+ size_t bytes_copied;
const __restrict __m256i *in = (const __m256i *)aligned_start;
if (current_frame->interleaved) {
__restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
++out1;
++out2;
}
- current_frame->len += (uint8_t *)in - aligned_start;
+ bytes_copied = (uint8_t *)in - aligned_start;
} else {
- __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
+ uint8_t *old_end = current_frame->data + current_frame->len;
+ __m256i *out = (__m256i *)old_end;
while (in < (const __m256i *)limit) {
__m256i data = _mm256_load_si256(in);
_mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used.
++in;
++out;
}
- current_frame->len = (uint8_t *)out - current_frame->data;
+ bytes_copied = (uint8_t *)out - old_end;
}
+ if (current_frame->data_copy != nullptr) {
+ // TODO: It would be somewhat more cache-efficient to write this in the
+ // same loop as above. However, it might not be worth the extra complexity.
+ memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
+ }
+ current_frame->len += bytes_copied;
//printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
return (const uint8_t *)in;
const __m128i needle = _mm_set1_epi8(sync_char);
const __m128i *in = (const __m128i *)aligned_start;
+ size_t bytes_copied;
if (current_frame->interleaved) {
__m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
__m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
++out1;
++out2;
}
- current_frame->len += (uint8_t *)in - aligned_start;
+ bytes_copied = (uint8_t *)in - aligned_start;
} else {
- __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
+ uint8_t *old_end = current_frame->data + current_frame->len;
+ __m128i *out = (__m128i *)old_end;
while (in < (const __m128i *)limit) {
__m128i data = _mm_load_si128(in);
_mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used.
++in;
++out;
}
- current_frame->len = (uint8_t *)out - current_frame->data;
+ bytes_copied = (uint8_t *)out - old_end;
+ }
+ if (current_frame->data_copy != nullptr) {
+ // TODO: It would be somewhat more cache-efficient to write this in the
+ // same loop as above. However, it might not be worth the extra complexity.
+ memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
}
+ current_frame->len += bytes_copied;
//printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
return (const uint8_t *)in;
decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
// Update the transfer with the new assumed width, if we're in the process of changing formats.
- change_xfer_size_for_width(usb->assumed_frame_width, xfr);
+ change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr);
}
}
if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) {
printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
}
+ pthread_setname_np(pthread_self(), "bmusb_usb_drv");
while (!should_quit) {
timeval sec { 1, 0 };
int rc = libusb_handle_events_timeout(nullptr, &sec);
return ret;
}
+void BMUSBCapture::set_pixel_format(PixelFormat pixel_format)
+{
+ current_pixel_format = pixel_format;
+ update_capture_mode();
+}
+
void BMUSBCapture::configure_card()
{
if (video_frame_allocator == nullptr) {
// set up isochronous transfers for audio and video
for (int e = 3; e <= 4; ++e) {
- //int num_transfers = (e == 3) ? 6 : 6;
int num_transfers = 6;
for (int i = 0; i < num_transfers; ++i) {
size_t buf_size;
int num_iso_pack, size;
if (e == 3) {
// Allocate for minimum width (because that will give us the most
- // number of packets, so we don't need to reallocated, but we'll
+ // number of packets, so we don't need to reallocate, but we'll
// default to 720p for the first frame.
- size = find_xfer_size_for_width(MIN_WIDTH);
+ size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH);
num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size;
buf_size = USB_VIDEO_TRANSFER_SIZE;
} else {
xfr->user_data = this;
if (e == 3) {
- change_xfer_size_for_width(assumed_frame_width, xfr);
+ change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr);
}
iso_xfrs.push_back(xfr);
void BMUSBCapture::stop_bm_thread()
{
should_quit = true;
+ libusb_interrupt_event_handler(nullptr);
usb_thread.join();
}
void BMUSBCapture::update_capture_mode()
{
- // clearing the 0x20000000 bit seems to activate 10-bit capture (v210).
- // clearing the 0x08000000 bit seems to change the capture format (other source?)
- uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input);
+ if (devh == nullptr) {
+ return;
+ }
+
+ // Clearing the 0x08000000 bit seems to change the capture format (other source?).
+ uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input);
+ if (current_pixel_format == PixelFormat_8BitYCbCr) {
+ mode |= htonl(0x20000000);
+ } else {
+ assert(current_pixel_format == PixelFormat_10BitYCbCr);
+ }
int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT,
/*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);