X-Git-Url: https://git.sesse.net/?p=bmusb;a=blobdiff_plain;f=bmusb.cpp;h=0169d062dad8d1823ed441e29cc3938eee2c2e09;hp=9c0ff26e7b767cf522ce5184f9be30130e5c61d9;hb=HEAD;hpb=227166d4398263e1191bdfe244f62bc38aaf4c1f diff --git a/bmusb.cpp b/bmusb.cpp index 9c0ff26..19a9da1 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -1,5 +1,5 @@ -// Intensity Shuttle USB3 capture driver, v0.5.3 -// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable +// Intensity Shuttle USB3 capture driver, v0.7.8 +// Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable // (can do captures for hours at a time with no drops), except during startup // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -62,14 +63,24 @@ FILE *audiofp; thread usb_thread; atomic should_quit; -int find_xfer_size_for_width(int width) +int v210_stride(int width) +{ + return (width + 5) / 6 * 4 * sizeof(uint32_t); +} + +int find_xfer_size_for_width(PixelFormat pixel_format, int width) { // Video seems to require isochronous packets scaled with the width; // seemingly six lines is about right, rounded up to the required 1kB // multiple. - int size = width * 2 * 6; // Note that for 10-bit input, you'll need to increase size accordingly. - //size = size * 4 / 3; + int stride; + if (pixel_format == PixelFormat_10BitYCbCr) { + stride = v210_stride(width); + } else { + stride = width * sizeof(uint16_t); + } + int size = stride * 6; if (size % 1024 != 0) { size &= ~1023; size += 1024; @@ -77,10 +88,10 @@ int find_xfer_size_for_width(int width) return size; } -void change_xfer_size_for_width(int width, libusb_transfer *xfr) +void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr) { assert(width >= MIN_WIDTH); - size_t size = find_xfer_size_for_width(width); + size_t size = find_xfer_size_for_width(pixel_format, width); int num_iso_pack = xfr->length / size; if (num_iso_pack != xfr->num_iso_packets || size != xfr->iso_packet_desc[0].length) { @@ -111,6 +122,7 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma // It's a strange thing, but what can you do. decoded_video_format->width = 720; decoded_video_format->height = 525; + decoded_video_format->stride = 720 * 2; decoded_video_format->extra_lines_top = 0; decoded_video_format->extra_lines_bottom = 0; decoded_video_format->frame_rate_nom = 3013; @@ -118,11 +130,12 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma decoded_video_format->has_signal = false; return true; } - if ((video_format & 0xe800) != 0xe800) { + if ((video_format & 0xe000) != 0xe000) { printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", video_format); decoded_video_format->width = 0; decoded_video_format->height = 0; + decoded_video_format->stride = 0; decoded_video_format->extra_lines_top = 0; decoded_video_format->extra_lines_bottom = 0; decoded_video_format->frame_rate_nom = 60; @@ -134,9 +147,16 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma decoded_video_format->has_signal = true; // NTSC (480i59.94, I suppose). A special case, see below. - if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) { + if ((video_format & ~0x0800) == 0xe101 || + (video_format & ~0x0800) == 0xe1c1 || + (video_format & ~0x0800) == 0xe001) { decoded_video_format->width = 720; decoded_video_format->height = 480; + if (video_format & 0x0800) { + decoded_video_format->stride = 720 * 2; + } else { + decoded_video_format->stride = v210_stride(720); + } decoded_video_format->extra_lines_top = 17; decoded_video_format->extra_lines_bottom = 28; decoded_video_format->frame_rate_nom = 30000; @@ -147,9 +167,18 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma } // PAL (576i50, I suppose). A special case, see below. - if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) { + if ((video_format & ~0x0800) == 0xe109 || + (video_format & ~0x0800) == 0xe1c9 || + (video_format & ~0x0800) == 0xe009 || + (video_format & ~0x0800) == 0xe3e9 || + (video_format & ~0x0800) == 0xe3e1) { decoded_video_format->width = 720; decoded_video_format->height = 576; + if (video_format & 0x0800) { + decoded_video_format->stride = 720 * 2; + } else { + decoded_video_format->stride = v210_stride(720); + } decoded_video_format->extra_lines_top = 22; decoded_video_format->extra_lines_bottom = 27; decoded_video_format->frame_rate_nom = 25; @@ -162,31 +191,39 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma // 0x8 seems to be a flag about availability of deep color on the input, // except when it's not (e.g. it's the only difference between NTSC // and PAL). Rather confusing. But we clear it here nevertheless, because - // usually it doesn't mean anything. + // usually it doesn't mean anything. 0x0800 appears to be 8-bit input + // (as opposed to 10-bit). // // 0x4 is a flag I've only seen from the D4. I don't know what it is. uint16_t normalized_video_format = video_format & ~0xe80c; constexpr VideoFormatEntry entries[] = { { 0x01f1, 720, 480, 0, 40, 5, 60000, 1001, false }, // 480p59.94 (believed). { 0x0131, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. + { 0x0151, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50. { 0x0011, 720, 576, 0, 44, 5, 50, 1, false }, // 576p50 (5:4). { 0x0143, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. + { 0x0161, 1280, 720, 0, 25, 5, 50, 1, false }, // 720p50. { 0x0103, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. { 0x0125, 1280, 720, 0, 25, 5, 60, 1, false }, // 720p60. { 0x0121, 1280, 720, 0, 25, 5, 60000, 1001, false }, // 720p59.94. - { 0x01c3, 1920, 1080, 0, 0, 0, 30, 1, false }, // 1080p30. + { 0x01c3, 1920, 1080, 0, 41, 4, 30, 1, false }, // 1080p30. { 0x0003, 1920, 1080, 583, 20, 25, 30, 1, true }, // 1080i60. - { 0x01e1, 1920, 1080, 0, 0, 0, 30000, 1001, false }, // 1080p29.97. + { 0x01e1, 1920, 1080, 0, 41, 4, 30000, 1001, false }, // 1080p29.97. { 0x0021, 1920, 1080, 583, 20, 25, 30000, 1001, true }, // 1080i59.94. - { 0x0063, 1920, 1080, 0, 0, 0, 25, 1, false }, // 1080p25. - { 0x0043, 1920, 1080, 0, 0, 0, 25, 1, true }, // 1080p50. - { 0x008e, 1920, 1080, 0, 0, 0, 24, 1, false }, // 1080p24. - { 0x00a1, 1920, 1080, 0, 0, 0, 24000, 1001, false }, // 1080p23.98. + { 0x0063, 1920, 1080, 0, 41, 4, 25, 1, false }, // 1080p25. + { 0x0043, 1920, 1080, 583, 20, 25, 25, 1, true }, // 1080i50. + { 0x0083, 1920, 1080, 0, 41, 4, 24, 1, false }, // 1080p24. + { 0x00a1, 1920, 1080, 0, 41, 4, 24000, 1001, false }, // 1080p23.98. }; for (const VideoFormatEntry &entry : entries) { if (normalized_video_format == entry.normalized_video_format) { decoded_video_format->width = entry.width; decoded_video_format->height = entry.height; + if (video_format & 0x0800) { + decoded_video_format->stride = entry.width * 2; + } else { + decoded_video_format->stride = v210_stride(entry.width); + } decoded_video_format->second_field_start = entry.second_field_start; decoded_video_format->extra_lines_top = entry.extra_lines_top; decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom; @@ -200,11 +237,32 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma printf("Unknown video format 0x%04x (normalized 0x%04x). Assuming 720p60.\n", video_format, normalized_video_format); decoded_video_format->width = 1280; decoded_video_format->height = 720; + decoded_video_format->stride = 1280 * 2; decoded_video_format->frame_rate_nom = 60; decoded_video_format->frame_rate_den = 1; return false; } +// There are seemingly no direct indicators of sample rate; you just get +// one frame's worth and have to guess from that. +int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_rate) +{ + size_t num_samples = len / 3 / 8; + size_t num_samples_per_second = num_samples * video_format.frame_rate_nom / video_format.frame_rate_den; + + // See if we match or are very close to any of the mandatory HDMI sample rates. + const int candidate_sample_rates[] = { 32000, 44100, 48000 }; + for (int rate : candidate_sample_rates) { + if (abs(int(num_samples_per_second) - rate) <= 100) { + return rate; + } + } + + fprintf(stderr, "%ld samples at %d/%d fps (%ld Hz) matches no known sample rate, keeping capture at %d Hz\n", + num_samples, video_format.frame_rate_nom, video_format.frame_rate_den, num_samples_per_second, default_rate); + return default_rate; +} + } // namespace FrameAllocator::~FrameAllocator() {} @@ -284,14 +342,21 @@ void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len) void dump_audio_block(uint8_t *audio_start, size_t audio_len) { - fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + if (audiofp != nullptr) { + fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); + } } void BMUSBCapture::dequeue_thread_func() { + char thread_name[16]; + snprintf(thread_name, sizeof(thread_name), "bmusb_dequeue_%d", card_index); + pthread_setname_np(pthread_self(), thread_name); + if (has_dequeue_callbacks) { dequeue_init_callback(); } + size_t last_sample_rate = 48000; while (!dequeue_thread_should_quit) { unique_lock lock(queue_lock); queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); }); @@ -303,6 +368,7 @@ void BMUSBCapture::dequeue_thread_func() AudioFormat audio_format; audio_format.bits_per_sample = 24; audio_format.num_channels = 8; + audio_format.sample_rate = last_sample_rate; if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) { printf("Video block 0x%04x without corresponding audio block, dropping.\n", video_timecode); @@ -343,10 +409,16 @@ void BMUSBCapture::dequeue_thread_func() VideoFormat video_format; audio_format.id = audio_frame.format; if (decode_video_format(video_frame.format, &video_format)) { + if (audio_frame.frame.len != 0) { + audio_format.sample_rate = guess_sample_rate(video_format, audio_frame.frame.len, last_sample_rate); + last_sample_rate = audio_format.sample_rate; + } frame_callback(video_timecode, video_frame.frame, HEADER_SIZE, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); } else { + video_frame_allocator->release_frame(video_frame.frame); + audio_format.sample_rate = last_sample_rate; frame_callback(video_timecode, FrameAllocator::Frame(), 0, video_format, audio_frame.frame, AUDIO_HEADER_SIZE, audio_format); @@ -413,8 +485,8 @@ void BMUSBCapture::start_new_audio_block(const uint8_t *start) //dump_audio_block(); queue_frame(format, timecode, current_audio_frame, &pending_audio_frames); } - //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n", - // format, timecode, read_current_audio_block); + //printf("Found audio block start, format 0x%04x timecode 0x%04x\n", + // format, timecode); current_audio_frame = audio_frame_allocator->alloc_frame(); } @@ -466,6 +538,9 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } //dump_frame(); } else { + if (current_frame->data_copy != nullptr) { + memcpy(current_frame->data_copy + current_frame->len, start, bytes); + } if (current_frame->interleaved) { uint8_t *data = current_frame->data + current_frame->len / 2; uint8_t *data2 = current_frame->data2 + current_frame->len / 2; @@ -615,6 +690,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, { const __m256i needle = _mm256_set1_epi8(sync_char); + size_t bytes_copied; const __restrict __m256i *in = (const __m256i *)aligned_start; if (current_frame->interleaved) { __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); @@ -655,9 +731,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m256i *out = (__m256i *)old_end; while (in < (const __m256i *)limit) { __m256i data = _mm256_load_si256(in); _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. @@ -669,8 +746,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; } + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); + } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); return (const uint8_t *)in; @@ -682,6 +765,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, const __m128i needle = _mm_set1_epi8(sync_char); const __m128i *in = (const __m128i *)aligned_start; + size_t bytes_copied; if (current_frame->interleaved) { __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); @@ -712,9 +796,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++out1; ++out2; } - current_frame->len += (uint8_t *)in - aligned_start; + bytes_copied = (uint8_t *)in - aligned_start; } else { - __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + uint8_t *old_end = current_frame->data + current_frame->len; + __m128i *out = (__m128i *)old_end; while (in < (const __m128i *)limit) { __m128i data = _mm_load_si128(in); _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. @@ -726,8 +811,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame, ++in; ++out; } - current_frame->len = (uint8_t *)out - current_frame->data; + bytes_copied = (uint8_t *)out - old_end; + } + if (current_frame->data_copy != nullptr) { + // TODO: It would be somewhat more cache-efficient to write this in the + // same loop as above. However, it might not be worth the extra complexity. + memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied); } + current_frame->len += bytes_copied; //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); return (const uint8_t *)in; @@ -808,7 +899,7 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); // Update the transfer with the new assumed width, if we're in the process of changing formats. - change_xfer_size_for_width(usb->assumed_frame_width, xfr); + change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr); } } if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { @@ -886,6 +977,7 @@ void BMUSBCapture::usb_thread_func() if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) { printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno)); } + pthread_setname_np(pthread_self(), "bmusb_usb_drv"); while (!should_quit) { timeval sec { 1, 0 }; int rc = libusb_handle_events_timeout(nullptr, &sec); @@ -1037,6 +1129,12 @@ unsigned BMUSBCapture::num_cards() return ret; } +void BMUSBCapture::set_pixel_format(PixelFormat pixel_format) +{ + current_pixel_format = pixel_format; + update_capture_mode(); +} + void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { @@ -1295,16 +1393,15 @@ void BMUSBCapture::configure_card() // set up isochronous transfers for audio and video for (int e = 3; e <= 4; ++e) { - //int num_transfers = (e == 3) ? 6 : 6; int num_transfers = 6; for (int i = 0; i < num_transfers; ++i) { size_t buf_size; int num_iso_pack, size; if (e == 3) { // Allocate for minimum width (because that will give us the most - // number of packets, so we don't need to reallocated, but we'll + // number of packets, so we don't need to reallocate, but we'll // default to 720p for the first frame. - size = find_xfer_size_for_width(MIN_WIDTH); + size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH); num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size; buf_size = USB_VIDEO_TRANSFER_SIZE; } else { @@ -1343,7 +1440,7 @@ void BMUSBCapture::configure_card() xfr->user_data = this; if (e == 3) { - change_xfer_size_for_width(assumed_frame_width, xfr); + change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr); } iso_xfrs.push_back(xfr); @@ -1404,6 +1501,7 @@ void BMUSBCapture::start_bm_thread() void BMUSBCapture::stop_bm_thread() { should_quit = true; + libusb_interrupt_event_handler(nullptr); usb_thread.join(); } @@ -1460,9 +1558,17 @@ void BMUSBCapture::set_audio_input(uint32_t audio_input_id) void BMUSBCapture::update_capture_mode() { - // clearing the 0x20000000 bit seems to activate 10-bit capture (v210). - // clearing the 0x08000000 bit seems to change the capture format (other source?) - uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input); + if (devh == nullptr) { + return; + } + + // Clearing the 0x08000000 bit seems to change the capture format (other source?). + uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input); + if (current_pixel_format == PixelFormat_8BitYCbCr) { + mode |= htonl(0x20000000); + } else { + assert(current_pixel_format == PixelFormat_10BitYCbCr); + } int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT, /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0);