From 01ddb8f836114c07cff3ca040d9ed2c946b2fdbf Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 21 Feb 2017 18:40:03 +0100 Subject: [PATCH] Support 10-bit capture. --- bmusb.cpp | 84 +++++++++++++++++++++++++++++++++----------- bmusb/bmusb.h | 48 +++++++++++++++++++++++++ bmusb/fake_capture.h | 16 +++++++++ fake_capture.cpp | 43 +++++++++++++++++++++-- 4 files changed, 167 insertions(+), 24 deletions(-) diff --git a/bmusb.cpp b/bmusb.cpp index 3d090d7..088fc98 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -1,5 +1,5 @@ // Intensity Shuttle USB3 capture driver, v0.5.4 -// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable +// Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable // (can do captures for hours at a time with no drops), except during startup // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. @@ -63,14 +63,24 @@ FILE *audiofp; thread usb_thread; atomic should_quit; -int find_xfer_size_for_width(int width) +int v210_stride(int width) +{ + return (width + 5) / 6 * 4 * sizeof(uint32_t); +} + +int find_xfer_size_for_width(PixelFormat pixel_format, int width) { // Video seems to require isochronous packets scaled with the width; // seemingly six lines is about right, rounded up to the required 1kB // multiple. - int size = width * 2 * 6; // Note that for 10-bit input, you'll need to increase size accordingly. - //size = size * 4 / 3; + int stride; + if (pixel_format == PixelFormat_10BitYCbCr) { + stride = v210_stride(width); + } else { + stride = width * sizeof(uint16_t); + } + int size = stride * 6; if (size % 1024 != 0) { size &= ~1023; size += 1024; @@ -78,10 +88,10 @@ int find_xfer_size_for_width(int width) return size; } -void change_xfer_size_for_width(int width, libusb_transfer *xfr) +void change_xfer_size_for_width(PixelFormat pixel_format, int width, libusb_transfer *xfr) { assert(width >= MIN_WIDTH); - size_t size = find_xfer_size_for_width(width); + size_t size = find_xfer_size_for_width(pixel_format, width); int num_iso_pack = xfr->length / size; if (num_iso_pack != xfr->num_iso_packets || size != xfr->iso_packet_desc[0].length) { @@ -120,7 +130,7 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma decoded_video_format->has_signal = false; return true; } - if ((video_format & 0xe800) != 0xe800) { + if ((video_format & 0xe000) != 0xe000) { printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", video_format); decoded_video_format->width = 0; @@ -137,10 +147,16 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma decoded_video_format->has_signal = true; // NTSC (480i59.94, I suppose). A special case, see below. - if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) { + if ((video_format & ~0x0800) == 0xe101 || + (video_format & ~0x0800) == 0xe1c1 || + (video_format & ~0x0800) == 0xe001) { decoded_video_format->width = 720; decoded_video_format->height = 480; - decoded_video_format->stride = 720 * 2; + if (video_format & 0x0800) { + decoded_video_format->stride = 720 * 2; + } else { + decoded_video_format->stride = v210_stride(720); + } decoded_video_format->extra_lines_top = 17; decoded_video_format->extra_lines_bottom = 28; decoded_video_format->frame_rate_nom = 30000; @@ -151,10 +167,18 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma } // PAL (576i50, I suppose). A special case, see below. - if (video_format == 0xe909 || video_format == 0xe9c9 || video_format == 0xe809 || video_format == 0xebe9 || video_format == 0xebe1) { + if ((video_format & ~0x0800) == 0xe109 || + (video_format & ~0x0800) == 0xe1c9 || + (video_format & ~0x0800) == 0xe009 || + (video_format & ~0x0800) == 0xe3e9 || + (video_format & ~0x0800) == 0xe3e1) { decoded_video_format->width = 720; decoded_video_format->height = 576; - decoded_video_format->stride = 720 * 2; + if (video_format & 0x0800) { + decoded_video_format->stride = 720 * 2; + } else { + decoded_video_format->stride = v210_stride(720); + } decoded_video_format->extra_lines_top = 22; decoded_video_format->extra_lines_bottom = 27; decoded_video_format->frame_rate_nom = 25; @@ -167,7 +191,8 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma // 0x8 seems to be a flag about availability of deep color on the input, // except when it's not (e.g. it's the only difference between NTSC // and PAL). Rather confusing. But we clear it here nevertheless, because - // usually it doesn't mean anything. + // usually it doesn't mean anything. 0x0800 appears to be 8-bit input + // (as opposed to 10-bit). // // 0x4 is a flag I've only seen from the D4. I don't know what it is. uint16_t normalized_video_format = video_format & ~0xe80c; @@ -192,7 +217,11 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma if (normalized_video_format == entry.normalized_video_format) { decoded_video_format->width = entry.width; decoded_video_format->height = entry.height; - decoded_video_format->stride = entry.width * 2; + if (video_format & 0x0800) { + decoded_video_format->stride = entry.width * 2; + } else { + decoded_video_format->stride = v210_stride(entry.width); + } decoded_video_format->second_field_start = entry.second_field_start; decoded_video_format->extra_lines_top = entry.extra_lines_top; decoded_video_format->extra_lines_bottom = entry.extra_lines_bottom; @@ -819,7 +848,7 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); // Update the transfer with the new assumed width, if we're in the process of changing formats. - change_xfer_size_for_width(usb->assumed_frame_width, xfr); + change_xfer_size_for_width(usb->current_pixel_format, usb->assumed_frame_width, xfr); } } if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { @@ -1049,6 +1078,12 @@ unsigned BMUSBCapture::num_cards() return ret; } +void BMUSBCapture::set_pixel_format(PixelFormat pixel_format) +{ + current_pixel_format = pixel_format; + update_capture_mode(); +} + void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { @@ -1307,16 +1342,15 @@ void BMUSBCapture::configure_card() // set up isochronous transfers for audio and video for (int e = 3; e <= 4; ++e) { - //int num_transfers = (e == 3) ? 6 : 6; int num_transfers = 6; for (int i = 0; i < num_transfers; ++i) { size_t buf_size; int num_iso_pack, size; if (e == 3) { // Allocate for minimum width (because that will give us the most - // number of packets, so we don't need to reallocated, but we'll + // number of packets, so we don't need to reallocate, but we'll // default to 720p for the first frame. - size = find_xfer_size_for_width(MIN_WIDTH); + size = find_xfer_size_for_width(PixelFormat_8BitYCbCr, MIN_WIDTH); num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size; buf_size = USB_VIDEO_TRANSFER_SIZE; } else { @@ -1355,7 +1389,7 @@ void BMUSBCapture::configure_card() xfr->user_data = this; if (e == 3) { - change_xfer_size_for_width(assumed_frame_width, xfr); + change_xfer_size_for_width(current_pixel_format, assumed_frame_width, xfr); } iso_xfrs.push_back(xfr); @@ -1472,9 +1506,17 @@ void BMUSBCapture::set_audio_input(uint32_t audio_input_id) void BMUSBCapture::update_capture_mode() { - // clearing the 0x20000000 bit seems to activate 10-bit capture (v210). - // clearing the 0x08000000 bit seems to change the capture format (other source?) - uint32_t mode = htonl(0x29000000 | current_video_input | current_audio_input); + if (devh == nullptr) { + return; + } + + // Clearing the 0x08000000 bit seems to change the capture format (other source?). + uint32_t mode = htonl(0x09000000 | current_video_input | current_audio_input); + if (current_pixel_format == PixelFormat_8BitYCbCr) { + mode |= htonl(0x20000000); + } else { + assert(current_pixel_format == PixelFormat_10BitYCbCr); + } int rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_OUT, /*request=*/215, /*value=*/0, /*index=*/0, (unsigned char *)&mode, sizeof(mode), /*timeout=*/0); diff --git a/bmusb/bmusb.h b/bmusb/bmusb.h index ed106a6..83677af 100644 --- a/bmusb/bmusb.h +++ b/bmusb/bmusb.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,9 @@ class FrameAllocator { // If set to true, every other byte will go to data and to data2. // If so, and are still about the number of total bytes // so if size == 1024, there's 512 bytes in data and 512 in data2. + // + // This doesn't really make any sense if you asked for the + // 10BitYCbCr pixel format. bool interleaved = false; // At what point this frame was received. Note that this marks the @@ -124,6 +128,32 @@ struct AudioFormat { unsigned num_channels = 0; }; +enum PixelFormat { + // 8-bit 4:2:2 in the standard Cb Y Cr Y order (UYVY). + // This is the default. + PixelFormat_8BitYCbCr, + + // 10-bit 4:2:2 in v210 order. Six pixels (six Y', three Cb, + // three Cr) are packed into four 32-bit little-endian ints + // in the following pattern (see e.g. the DeckLink documentation + // for reference): + // + // A B G R + // ----------------- + // X Cr0 Y0 Cb0 + // X Y2 Cb2 Y1 + // X Cb4 Y3 Cr2 + // X Y5 Cr4 Y4 + // + // If you read in RGB order and ignore the unused top bits, + // this is essentially Cb Y Cr Y order, just like UYVY is. + // + // Note that unlike true v210, there is no guarantee about + // 128-byte line alignment (or lack thereof); you should check + // the stride member of VideoFormat. + PixelFormat_10BitYCbCr +}; + typedef std::function @@ -140,6 +170,11 @@ class CaptureInterface { virtual uint32_t get_current_video_mode() const = 0; virtual void set_video_mode(uint32_t video_mode_id) = 0; + // TODO: Add a way to query this based on mode? + virtual std::set get_available_pixel_formats() const = 0; + virtual void set_pixel_format(PixelFormat pixel_format) = 0; + virtual PixelFormat get_current_pixel_format() const = 0; + virtual std::map get_available_video_inputs() const = 0; virtual void set_video_input(uint32_t video_input_id) = 0; virtual uint32_t get_current_video_input() const = 0; @@ -191,6 +226,18 @@ class BMUSBCapture : public CaptureInterface { // actually opening the card (in configure_card()). static unsigned num_cards(); + std::set get_available_pixel_formats() const override + { + return std::set{ PixelFormat_8BitYCbCr, PixelFormat_10BitYCbCr }; + } + + void set_pixel_format(PixelFormat pixel_format) override; + + PixelFormat get_current_pixel_format() const + { + return current_pixel_format; + } + std::map get_available_video_modes() const override; uint32_t get_current_video_mode() const override; void set_video_mode(uint32_t video_mode_id) override; @@ -343,6 +390,7 @@ class BMUSBCapture : public CaptureInterface { libusb_device_handle *devh = nullptr; uint32_t current_video_input = 0x00000000; // HDMI/SDI. uint32_t current_audio_input = 0x00000000; // Embedded. + PixelFormat current_pixel_format = PixelFormat_8BitYCbCr; bool disconnected = false; }; diff --git a/bmusb/fake_capture.h b/bmusb/fake_capture.h index 06f7ef2..ea9c380 100644 --- a/bmusb/fake_capture.h +++ b/bmusb/fake_capture.h @@ -65,6 +65,21 @@ public: void stop_dequeue_thread() override; bool get_disconnected() const override { return false; } + std::set get_available_pixel_formats() const override + { + return std::set{ PixelFormat_8BitYCbCr, PixelFormat_10BitYCbCr }; + } + + void set_pixel_format(PixelFormat pixel_format) override + { + current_pixel_format = pixel_format; + } + + PixelFormat get_current_pixel_format() const + { + return current_pixel_format; + } + std::map get_available_video_modes() const override; void set_video_mode(uint32_t video_mode_id) override; uint32_t get_current_video_mode() const override { return 0; } @@ -82,6 +97,7 @@ private: void make_tone(int32_t *out, unsigned num_stereo_samples, unsigned num_channels); unsigned width, height, fps, audio_sample_frequency; + PixelFormat current_pixel_format = PixelFormat_8BitYCbCr; int card_index; uint8_t y, cb, cr; diff --git a/fake_capture.cpp b/fake_capture.cpp index 2ef1d7d..5f4f5da 100644 --- a/fake_capture.cpp +++ b/fake_capture.cpp @@ -87,6 +87,26 @@ void memset4(uint8_t *s, const uint8_t c[4], size_t n) } } +void memset16(uint8_t *s, const uint32_t c[4], size_t n) +{ + size_t i = 0; +#if __SSE2__ + __m128i cc = *(__m128i *)c; + __m128i *out = (__m128i *)s; + + for ( ; i < (n & ~1); i += 2) { + _mm_storeu_si128(out++, cc); + _mm_storeu_si128(out++, cc); + } + + s = (uint8_t *)out; +#endif + for ( ; i < n; ++i) { + memcpy(s, c, 16); + s += 16; + } +} + } // namespace FakeCapture::FakeCapture(unsigned width, unsigned height, unsigned fps, unsigned audio_sample_frequency, int card_index, bool has_audio) @@ -246,6 +266,11 @@ void FakeCapture::producer_thread_func() VideoFormat video_format; video_format.width = width; video_format.height = height; + if (current_pixel_format == PixelFormat_10BitYCbCr) { + video_format.stride = (width + 5) / 6 * 4 * sizeof(uint32_t); + } else { + video_format.stride = width * 2; + } video_format.frame_rate_nom = fps; video_format.frame_rate_den = 1; video_format.has_signal = true; @@ -255,14 +280,26 @@ void FakeCapture::producer_thread_func() if (video_frame.data != nullptr) { assert(video_frame.size >= width * height * 2); if (video_frame.interleaved) { + assert(current_pixel_format == PixelFormat_8BitYCbCr); uint8_t cbcr[] = { cb, cr }; memset2(video_frame.data, cbcr, width * height / 2); memset(video_frame.data2, y, width * height); } else { - uint8_t ycbcr[] = { y, cb, y, cr }; - memset4(video_frame.data, ycbcr, width * height / 2); + if (current_pixel_format == PixelFormat_10BitYCbCr) { + // Just use the 8-bit-values shifted left by 2. + // It's not 100% correct, but it's close enough. + uint32_t pix[4]; + pix[0] = (cb << 2) | (y << 12) | (cr << 22); + pix[1] = (y << 2) | (cb << 12) | ( y << 22); + pix[2] = (cr << 2) | (y << 12) | (cb << 22); + pix[3] = (y << 2) | (cr << 12) | ( y << 22); + memset16(video_frame.data, pix, video_format.stride * height / sizeof(pix)); + } else { + uint8_t ycbcr[] = { y, cb, y, cr }; + memset4(video_frame.data, ycbcr, width * height / 2); + } } - video_frame.len = width * height * 2; + video_frame.len = video_format.stride * height; video_frame.received_timestamp = timestamp; } -- 2.39.2