X-Git-Url: https://git.sesse.net/?p=bmusb;a=blobdiff_plain;f=bmusb.cpp;h=0169d062dad8d1823ed441e29cc3938eee2c2e09;hp=663a235f05a75698283d6dd5f5ea59295c2f0db3;hb=HEAD;hpb=8b4a8b1610810c50ac7afe68577cd86cf04b2b06 diff --git a/bmusb.cpp b/bmusb.cpp index 663a235..19a9da1 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -1,4 +1,4 @@ -// Intensity Shuttle USB3 capture driver, v0.6.0 +// Intensity Shuttle USB3 capture driver, v0.7.8 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable // (can do captures for hours at a time with no drops), except during startup // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) @@ -199,8 +199,10 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma constexpr VideoFormatEntry entries[] = { { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. + { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. + { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. @@ -243,7 +245,7 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma // There are seemingly no direct indicators of sample rate; you just get // one frame's worth and have to guess from that. -int guess_sample_rate(const VideoFormat &video_format, size_t len) +int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate) { size_t num_samples = len / 3 / 8; size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den; @@ -251,14 +253,14 @@ int guess_sample_rate(const VideoFormat &video_format, size_t len) // See if we match or are very close to any of the mandatory HDMI sample rates. const int candidate_sample_rates[] = { 32000, 44100, 48000 }; for (int rate : candidate_sample_rates) { - if (abs(int(num_samples_per_second) - rate) < 50) { + if (abs(int(num_samples_per_second) - rate) <= 100) { return rate; } } - fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, assuming 48000 Hz\n", - num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second); - return 48000; + fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n", + num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate); + return default_rate; } } // namespace @@ -340,7 +342,9 @@ void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len) void dump_audio_block(uint8_t *audio_start, size_t audio_len) { - fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + if (audiofp != nullptr) { + fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + } } void BMUSBCapture::dequeue_thread_func() @@ -406,13 +410,14 @@ void BMUSBCapture::dequeue_thread_func() audio_format.id = audio_frame.format; if (decode_video_format(video_frame.format, &video_format)) { if (audio_frame.frame.len != 0) { - audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len); + audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate); last_sample_rate = audio_format.sample_rate; } frame_callback(video_timecode, video_frame.frame, HEADER_SIZE, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); } else { + video_frame_allocator->release_frame(video_frame.frame); audio_format.sample_rate = last_sample_rate; frame_callback(video_timecode, FrameAllocator::Frame(), 0, video_format, @@ -533,6 +538,9 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } //dump_frame(); } else { + if (current_frame->data_copy != nullptr) { + memcpy(current_frame->data_copy + current_frame->len, start, bytes); + } if (current_frame->interleaved) { uint8_t *data = current_frame->data + current_frame->len / 2; uint8_t *data2 = current_frame->data2 + current_frame->len / 2; @@ -682,6 +690,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, { const __m256i needle = _mm256_set1_epi8(sync_char); + size_t bytes_copied; const __restrict __m256i *in = (const __m256i *)aligned_start; if (current_frame->interleaved) { __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); @@ -722,9 +731,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m256i *out = (__m256i *)old_end; while (in < (const __m256i *)limit) { __m256i data = _mm256_load_si256(in); _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. @@ -736,8 +746,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; } + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); + } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); return (const uint8_t *)in; @@ -749,6 +765,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const __m128i needle = _mm_set1_epi8(sync_char); const __m128i *in = (const __m128i *)aligned_start; + size_t bytes_copied; if (current_frame->interleaved) { __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); @@ -779,9 +796,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m128i *out = (__m128i *)old_end; while (in < (const __m128i *)limit) { __m128i data = _mm_load_si128(in); _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. @@ -793,8 +811,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; + } + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); return (const uint8_t *)in; @@ -1477,6 +1501,7 @@ void BMUSBCapture::start_bm_thread() void BMUSBCapture::stop_bm_thread() { should_quit = true; + libusb_interrupt_event_handler(nullptr); usb_thread.join(); }