X-Git-Url: https://git.sesse.net/?p=bmusb;a=blobdiff_plain;f=bmusb.cpp;h=0169d062dad8d1823ed441e29cc3938eee2c2e09;hp=088fc9853f5d6a3df87ab00ac44971b768200f64;hb=HEAD;hpb=01ddb8f836114c07cff3ca040d9ed2c946b2fdbf diff --git a/bmusb.cpp b/bmusb.cpp index 088fc98..19a9da1 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -1,4 +1,4 @@ -// Intensity Shuttle USB3 capture driver, v0.5.4 +// Intensity Shuttle USB3 capture driver, v0.7.8 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable // (can do captures for hours at a time with no drops), except during startup // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) @@ -199,8 +199,10 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma constexpr VideoFormatEntry entries[] = { { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. + { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. + { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. @@ -241,6 +243,26 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma return false; } +// There are seemingly no direct indicators of sample rate; you just get +// one frame's worth and have to guess from that. +int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate) +{ + size_t num_samples = len / 3 / 8; + size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den; + + // See if we match or are very close to any of the mandatory HDMI sample rates. + const int candidate_sample_rates[] = { 32000, 44100, 48000 }; + for (int rate : candidate_sample_rates) { + if (abs(int(num_samples_per_second) - rate) <= 100) { + return rate; + } + } + + fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n", + num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate); + return default_rate; +} + } // namespace FrameAllocator::~FrameAllocator() {} @@ -320,7 +342,9 @@ void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len) void dump_audio_block(uint8_t *audio_start, size_t audio_len) { - fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + if (audiofp != nullptr) { + fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + } } void BMUSBCapture::dequeue_thread_func() @@ -332,6 +356,7 @@ void BMUSBCapture::dequeue_thread_func() if (has_dequeue_callbacks) { dequeue_init_callback(); } + size_t last_sample_rate = 48000; while (!dequeue_thread_should_quit) { unique_lock lock(queue_lock); queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); }); @@ -343,6 +368,7 @@ void BMUSBCapture::dequeue_thread_func() AudioFormat audio_format; audio_format.bits_per_sample = 24; audio_format.num_channels = 8; + audio_format.sample_rate = last_sample_rate; if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) { printf("Video block 0x%04x without corresponding audio block, dropping.\n", video_timecode); @@ -383,10 +409,16 @@ void BMUSBCapture::dequeue_thread_func() VideoFormat video_format; audio_format.id = audio_frame.format; if (decode_video_format(video_frame.format, &video_format)) { + if (audio_frame.frame.len != 0) { + audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate); + last_sample_rate = audio_format.sample_rate; + } frame_callback(video_timecode, video_frame.frame, HEADER_SIZE, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); } else { + video_frame_allocator->release_frame(video_frame.frame); + audio_format.sample_rate = last_sample_rate; frame_callback(video_timecode, FrameAllocator::Frame(), 0, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); @@ -453,8 +485,8 @@ void BMUSBCapture::start_new_audio_block(const uint8_t *start) //dump_audio_block(); queue_frame(format, timecode, current_audio_frame, &pending_audio_frames); } - //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n", - // format, timecode, read_current_audio_block); + //printf("Found audio block start, format 0x%04x timecode 0x%04x\n", + // format, timecode); current_audio_frame = audio_frame_allocator->alloc_frame(); } @@ -506,6 +538,9 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } //dump_frame(); } else { + if (current_frame->data_copy != nullptr) { + memcpy(current_frame->data_copy + current_frame->len, start, bytes); + } if (current_frame->interleaved) { uint8_t *data = current_frame->data + current_frame->len / 2; uint8_t *data2 = current_frame->data2 + current_frame->len / 2; @@ -655,6 +690,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, { const __m256i needle = _mm256_set1_epi8(sync_char); + size_t bytes_copied; const __restrict __m256i *in = (const __m256i *)aligned_start; if (current_frame->interleaved) { __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); @@ -695,9 +731,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m256i *out = (__m256i *)old_end; while (in < (const __m256i *)limit) { __m256i data = _mm256_load_si256(in); _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. @@ -709,8 +746,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; } + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); + } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); return (const uint8_t *)in; @@ -722,6 +765,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const __m128i needle = _mm_set1_epi8(sync_char); const __m128i *in = (const __m128i *)aligned_start; + size_t bytes_copied; if (current_frame->interleaved) { __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); @@ -752,9 +796,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m128i *out = (__m128i *)old_end; while (in < (const __m128i *)limit) { __m128i data = _mm_load_si128(in); _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. @@ -766,8 +811,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; + } + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); return (const uint8_t *)in; @@ -1450,6 +1501,7 @@ void BMUSBCapture::start_bm_thread() void BMUSBCapture::stop_bm_thread() { should_quit = true; + libusb_interrupt_event_handler(nullptr); usb_thread.join(); }