X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=bmusb.cpp;h=c2c49abb476a2057eaa491d6181ef9cb16398be5;hb=0bcf6eaf20da221bb122445e057a0934acae9f15;hp=c03b228c0d5d98fef78c56b256c96490a48ebf45;hpb=dfaf0700aefbcede0d058254f1f0a456ad46de03;p=bmusb diff --git a/bmusb.cpp b/bmusb.cpp index c03b228..c2c49ab 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -4,56 +4,91 @@ // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include #include -#include -#include -#include -#ifdef __SSE2__ +#ifdef __SSE4_1__ #include #endif +#include "bmusb.h" + #include +#include +#include +#include +#include +#include #include #include -#include -#include #include -#include -#include #include -#include -#include "bmusb.h" +#include using namespace std; using namespace std::placeholders; -#define WIDTH 1280 -#define HEIGHT 750 /* 30 lines ancillary data? */ -//#define WIDTH 1920 -//#define HEIGHT 1125 /* ??? lines ancillary data? */ +#define MIN_WIDTH 640 #define HEADER_SIZE 44 //#define HEADER_SIZE 0 #define AUDIO_HEADER_SIZE 4 -//#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY -//#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210 -#define FRAME_SIZE (8 << 20) +#define FRAME_SIZE (8 << 20) // 8 MB. +#define USB_VIDEO_TRANSFER_SIZE (128 << 10) // 128 kB. + +namespace { FILE *audiofp; thread usb_thread; atomic should_quit; +int find_xfer_size_for_width(int width) +{ + // Video seems to require isochronous packets scaled with the width; + // seemingly six lines is about right, rounded up to the required 1kB + // multiple. + int size = width * 2 * 6; + // Note that for 10-bit input, you'll need to increase size accordingly. + //size = size * 4 / 3; + if (size % 1024 != 0) { + size &= ~1023; + size += 1024; + } + return size; +} + +void change_xfer_size_for_width(int width, libusb_transfer *xfr) +{ + assert(width >= MIN_WIDTH); + size_t size = find_xfer_size_for_width(width); + int num_iso_pack = xfr->length / size; + if (num_iso_pack != xfr->num_iso_packets || + size != xfr->iso_packet_desc[0].length) { + xfr->num_iso_packets = num_iso_pack; + libusb_set_iso_packet_lengths(xfr, size); + } +} + +} // namespace + FrameAllocator::~FrameAllocator() {} -#define NUM_QUEUED_FRAMES 8 +// Audio is more important than video, and also much cheaper. +// By having many more audio frames available, hopefully if something +// starts to drop, we'll have CPU load go down (from not having to +// process as much video) before we have to drop audio. +#define NUM_QUEUED_VIDEO_FRAMES 16 +#define NUM_QUEUED_AUDIO_FRAMES 64 + class MallocFrameAllocator : public FrameAllocator { public: - MallocFrameAllocator(size_t frame_size); + MallocFrameAllocator(size_t frame_size, size_t num_queued_frames); Frame alloc_frame() override; void release_frame(Frame frame) override; @@ -64,10 +99,10 @@ private: stack> freelist; // All of size . }; -MallocFrameAllocator::MallocFrameAllocator(size_t frame_size) +MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames) : frame_size(frame_size) { - for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) { + for (size_t i = 0; i < num_queued_frames; ++i) { freelist.push(unique_ptr(new uint8_t[frame_size])); } } @@ -91,6 +126,9 @@ FrameAllocator::Frame MallocFrameAllocator::alloc_frame() void MallocFrameAllocator::release_frame(Frame frame) { + if (frame.overflow > 0) { + printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow)); + } unique_lock lock(freelist_mutex); freelist.push(unique_ptr(frame.data)); } @@ -109,6 +147,7 @@ bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b) void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque *q) { + unique_lock lock(queue_lock); if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) { printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n", q->back().timecode, timecode); @@ -120,11 +159,7 @@ void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocato qf.format = format; qf.timecode = timecode; qf.frame = frame; - - { - unique_lock lock(queue_lock); - q->push_back(move(qf)); - } + q->push_back(move(qf)); queues_not_empty.notify_one(); // might be spurious } @@ -142,24 +177,35 @@ void dump_audio_block(uint8_t *audio_start, size_t audio_len) fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); } -void BMUSBCapture::dequeue_thread() +void BMUSBCapture::dequeue_thread_func() { - for ( ;; ) { + if (has_dequeue_callbacks) { + dequeue_init_callback(); + } + while (!dequeue_thread_should_quit) { unique_lock lock(queue_lock); - queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); }); + queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); }); + + if (dequeue_thread_should_quit) break; uint16_t video_timecode = pending_video_frames.front().timecode; uint16_t audio_timecode = pending_audio_frames.front().timecode; - if (video_timecode < audio_timecode) { + if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) { printf("Video block 0x%04x without corresponding audio block, dropping.\n", video_timecode); - video_frame_allocator->release_frame(pending_video_frames.front().frame); + QueuedFrame video_frame = pending_video_frames.front(); pending_video_frames.pop_front(); - } else if (audio_timecode < video_timecode) { - printf("Audio block 0x%04x without corresponding video block, dropping.\n", + lock.unlock(); + video_frame_allocator->release_frame(video_frame.frame); + } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) { + printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n", audio_timecode); - audio_frame_allocator->release_frame(pending_audio_frames.front().frame); + QueuedFrame audio_frame = pending_audio_frames.front(); pending_audio_frames.pop_front(); + lock.unlock(); + frame_callback(audio_timecode, + FrameAllocator::Frame(), 0, 0x0000, + audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format); } else { QueuedFrame video_frame = pending_video_frames.front(); QueuedFrame audio_frame = pending_audio_frames.front(); @@ -179,6 +225,9 @@ void BMUSBCapture::dequeue_thread() audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format); } } + if (has_dequeue_callbacks) { + dequeue_cleanup_callback(); + } } void BMUSBCapture::start_new_frame(const uint8_t *start) @@ -187,8 +236,31 @@ void BMUSBCapture::start_new_frame(const uint8_t *start) uint16_t timecode = (start[1] << 8) | start[0]; if (current_video_frame.len > 0) { + // If format is 0x0800 (no signal), add a fake (empty) audio + // frame to get it out of the queue. + // TODO: Figure out if there are other formats that come with + // no audio, and treat them the same. + if (format == 0x0800) { + FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame(); + if (fake_audio_frame.data == nullptr) { + // Oh well, it's just a no-signal frame anyway. + printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n"); + current_video_frame.owner->release_frame(current_video_frame); + current_video_frame = video_frame_allocator->alloc_frame(); + return; + } + queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames); + } //dump_frame(); queue_frame(format, timecode, current_video_frame, &pending_video_frames); + + // Update the assumed frame width. We might be one frame too late on format changes, + // but it's much better than asking the user to choose manually. + int width, height, frame_rate_nom, frame_rate_den; + bool interlaced; + if (decode_video_format(format, &width, &height, &frame_rate_nom, &frame_rate_den, &interlaced)) { + assumed_frame_width = width; + } } //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n", // format, timecode, @@ -255,8 +327,13 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n int bytes = end - start; if (current_frame->len + bytes > current_frame->size) { - printf("%d bytes overflow after last %s frame\n", - int(current_frame->len + bytes - current_frame->size), frame_type_name); + current_frame->overflow = current_frame->len + bytes - current_frame->size; + current_frame->len = current_frame->size; + if (current_frame->overflow > 1048576) { + printf("%d bytes overflow after last %s frame\n", + int(current_frame->overflow), frame_type_name); + current_frame->overflow = 0; + } //dump_frame(); } else { if (current_frame->interleaved) { @@ -281,7 +358,50 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } } -#ifdef __SSE2__ +#ifdef __SSE4_1__ + +#if 0 +void avx2_dump(const char *name, __m256i n) +{ + printf("%-10s:", name); + printf(" %02x", _mm256_extract_epi8(n, 0)); + printf(" %02x", _mm256_extract_epi8(n, 1)); + printf(" %02x", _mm256_extract_epi8(n, 2)); + printf(" %02x", _mm256_extract_epi8(n, 3)); + printf(" %02x", _mm256_extract_epi8(n, 4)); + printf(" %02x", _mm256_extract_epi8(n, 5)); + printf(" %02x", _mm256_extract_epi8(n, 6)); + printf(" %02x", _mm256_extract_epi8(n, 7)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 8)); + printf(" %02x", _mm256_extract_epi8(n, 9)); + printf(" %02x", _mm256_extract_epi8(n, 10)); + printf(" %02x", _mm256_extract_epi8(n, 11)); + printf(" %02x", _mm256_extract_epi8(n, 12)); + printf(" %02x", _mm256_extract_epi8(n, 13)); + printf(" %02x", _mm256_extract_epi8(n, 14)); + printf(" %02x", _mm256_extract_epi8(n, 15)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 16)); + printf(" %02x", _mm256_extract_epi8(n, 17)); + printf(" %02x", _mm256_extract_epi8(n, 18)); + printf(" %02x", _mm256_extract_epi8(n, 19)); + printf(" %02x", _mm256_extract_epi8(n, 20)); + printf(" %02x", _mm256_extract_epi8(n, 21)); + printf(" %02x", _mm256_extract_epi8(n, 22)); + printf(" %02x", _mm256_extract_epi8(n, 23)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 24)); + printf(" %02x", _mm256_extract_epi8(n, 25)); + printf(" %02x", _mm256_extract_epi8(n, 26)); + printf(" %02x", _mm256_extract_epi8(n, 27)); + printf(" %02x", _mm256_extract_epi8(n, 28)); + printf(" %02x", _mm256_extract_epi8(n, 29)); + printf(" %02x", _mm256_extract_epi8(n, 30)); + printf(" %02x", _mm256_extract_epi8(n, 31)); + printf("\n"); +} +#endif // Does a memcpy and memchr in one to reduce processing time. // Note that the benefit is somewhat limited if your L3 cache is small, @@ -472,7 +592,7 @@ void decode_packs(const libusb_transfer *xfr, const uint8_t *start = xfr->buffer + offset; const uint8_t *limit = start + pack->actual_length; while (start < limit) { // Usually runs only one iteration. -#ifdef __SSE2__ +#ifdef __SSE4_1__ start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); if (start == limit) break; assert(start < limit); @@ -512,6 +632,9 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1)); } else { decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); + + // Update the transfer with the new assumed width, if we're in the process of changing formats. + change_xfer_size_for_width(usb->assumed_frame_width, xfr); } } if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { @@ -554,16 +677,15 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) } #endif - if (libusb_submit_transfer(xfr) < 0) { - fprintf(stderr, "error re-submitting URB\n"); + int rc = libusb_submit_transfer(xfr); + if (rc < 0) { + fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc)); exit(1); } } void BMUSBCapture::usb_thread_func() { - printf("usb thread started\n"); - sched_param param; memset(¶m, 0, sizeof(param)); param.sched_priority = 1; @@ -577,15 +699,90 @@ void BMUSBCapture::usb_thread_func() } } +struct USBCardDevice { + uint16_t product; + uint8_t bus, port; + libusb_device *device; +}; + +libusb_device_handle *open_card(int card_index) +{ + libusb_device **devices; + ssize_t num_devices = libusb_get_device_list(nullptr, &devices); + if (num_devices == -1) { + fprintf(stderr, "Error finding USB devices\n"); + exit(1); + } + vector found_cards; + for (ssize_t i = 0; i < num_devices; ++i) { + libusb_device_descriptor desc; + if (libusb_get_device_descriptor(devices[i], &desc) < 0) { + fprintf(stderr, "Error getting device descriptor for device %d\n", int(i)); + exit(1); + } + + uint8_t bus = libusb_get_bus_number(devices[i]); + uint8_t port = libusb_get_port_number(devices[i]); + + if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) && + !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) { + libusb_unref_device(devices[i]); + continue; + } + + found_cards.push_back({ desc.idProduct, bus, port, devices[i] }); + } + libusb_free_device_list(devices, 0); + + // Sort the devices to get a consistent ordering. + sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) { + if (a.product != b.product) + return a.product < b.product; + if (a.bus != b.bus) + return a.bus < b.bus; + return a.port < b.port; + }); + + for (size_t i = 0; i < found_cards.size(); ++i) { + fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port); + if (found_cards[i].product == 0xbd3b) { + fprintf(stderr, "Intensity Shuttle\n"); + } else if (found_cards[i].product == 0xbd4f) { + fprintf(stderr, "UltraStudio SDI\n"); + } else { + assert(false); + } + } + + if (size_t(card_index) >= found_cards.size()) { + fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size())); + exit(1); + } + + libusb_device_handle *devh; + int rc = libusb_open(found_cards[card_index].device, &devh); + if (rc < 0) { + fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc)); + exit(1); + } + + for (size_t i = 0; i < found_cards.size(); ++i) { + libusb_unref_device(found_cards[i].device); + } + + return devh; +} + void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { - set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak. + set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak. } if (audio_frame_allocator == nullptr) { - set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak. + set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak. } - thread(&BMUSBCapture::dequeue_thread, this).detach(); + dequeue_thread_should_quit = false; + dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this); int rc; struct libusb_transfer *xfr; @@ -596,9 +793,7 @@ void BMUSBCapture::configure_card() exit(1); } - //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); - //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f); - struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid); + libusb_device_handle *devh = open_card(card_index); if (!devh) { fprintf(stderr, "Error finding USB device\n"); exit(1); @@ -639,6 +834,9 @@ void BMUSBCapture::configure_card() // Alternate setting 1 is output, alternate setting 2 is input. // Card is reset when switching alternates, so the driver uses // this “double switch” when it wants to reset. + // + // There's also alternate settings 3 and 4, which seem to be + // like 1 and 2 except they advertise less bandwidth needed. rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); if (rc < 0) { fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); @@ -646,7 +844,7 @@ void BMUSBCapture::configure_card() } rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2); if (rc < 0) { - fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); + fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc)); exit(1); } #if 0 @@ -819,28 +1017,24 @@ void BMUSBCapture::configure_card() // set up isochronous transfers for audio and video for (int e = 3; e <= 4; ++e) { //int num_transfers = (e == 3) ? 6 : 6; - int num_transfers = 6; + int num_transfers = 10; for (int i = 0; i < num_transfers; ++i) { + size_t buf_size; int num_iso_pack, size; if (e == 3) { - // Video seems to require isochronous packets scaled with the width; - // seemingly six lines is about right, rounded up to the required 1kB - // multiple. - size = WIDTH * 2 * 6; - // Note that for 10-bit input, you'll need to increase size accordingly. - //size = size * 4 / 3; - if (size % 1024 != 0) { - size &= ~1023; - size += 1024; - } - num_iso_pack = (2 << 18) / size; // 512 kB. - printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size); + // Allocate for minimum width (because that will give us the most + // number of packets, so we don't need to reallocated, but we'll + // default to 720p for the first frame. + size = find_xfer_size_for_width(MIN_WIDTH); + num_iso_pack = USB_VIDEO_TRANSFER_SIZE / size; + buf_size = USB_VIDEO_TRANSFER_SIZE; } else { size = 0xc0; num_iso_pack = 80; + buf_size = num_iso_pack * size; } - int num_bytes = num_iso_pack * size; - uint8_t *buf = new uint8_t[num_bytes]; + assert(size_t(num_iso_pack * size) <= buf_size); + uint8_t *buf = new uint8_t[buf_size]; xfr = libusb_alloc_transfer(num_iso_pack); if (!xfr) { @@ -849,10 +1043,15 @@ void BMUSBCapture::configure_card() } int ep = LIBUSB_ENDPOINT_IN | e; - libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes, + libusb_fill_iso_transfer(xfr, devh, ep, buf, buf_size, num_iso_pack, cb_xfr, nullptr, 0); libusb_set_iso_packet_lengths(xfr, size); xfr->user_data = this; + + if (e == 3) { + change_xfer_size_for_width(assumed_frame_width, xfr); + } + iso_xfrs.push_back(xfr); } } @@ -885,6 +1084,13 @@ out: #endif } +void BMUSBCapture::stop_dequeue_thread() +{ + dequeue_thread_should_quit = true; + queues_not_empty.notify_all(); + dequeue_thread.join(); +} + void BMUSBCapture::start_bm_thread() { should_quit = false; @@ -896,3 +1102,90 @@ void BMUSBCapture::stop_bm_thread() should_quit = true; usb_thread.join(); } + +struct VideoFormatEntry { + uint16_t normalized_video_format; + int width, height; + int frame_rate_nom, frame_rate_den; + bool interlaced; +}; + +bool decode_video_format(uint16_t video_format, int *width, int *height, int *frame_rate_nom, int *frame_rate_den, bool *interlaced) +{ + *interlaced = false; + + if (video_format == 0x0800) { + // No video signal. These green pseudo-frames seem to come at about 30.13 Hz. + // It's a strange thing, but what can you do. + *width = 720; + *height = 525; + *frame_rate_nom = 3013; + *frame_rate_den = 100; + return true; + } + if ((video_format & 0xe800) != 0xe800) { + printf("Video format 0x%04x does not appear to be a video format. Assuming 60 Hz.\n", + video_format); + *width = 0; + *height = 0; + *frame_rate_nom = 60; + *frame_rate_den = 1; + return false; + } + + // NTSC (480i59.94, I suppose). A special case, see below. + if (video_format == 0xe901 || video_format == 0xe9c1 || video_format == 0xe801) { + *width = 640; + *height = 480; + *frame_rate_nom = 60000; + *frame_rate_den = 1001; + *interlaced = true; + return true; + } + + // PAL (576i50, I suppose). A special case, see below. + if (video_format == 0xe909) { + *width = 720; + *height = 576; + *frame_rate_nom = 50; + *frame_rate_den = 1; + *interlaced = true; + return true; + } + + // 0x8 seems to be a flag about availability of deep color on the input, + // except when it's not (e.g. it's the only difference between NTSC + // and PAL). Rather confusing. But we clear it here nevertheless, because + // usually it doesn't mean anything. + uint16_t normalized_video_format = video_format & ~0xe808; + constexpr VideoFormatEntry entries[] = { + { 0x0143, 1280, 720, 50, 1, false }, // 720p50. + { 0x0103, 1280, 720, 60, 1, false }, // 720p60. + { 0x0121, 1280, 720, 60000, 1001, false }, // 720p59.94. + { 0x01c3, 1920, 1080, 30, 1, false }, // 1080p30. + { 0x0003, 1920, 1080, 30, 1, true }, // 1080i60. + { 0x01e1, 1920, 1080, 30000, 1001, false }, // 1080p29.97. + { 0x0021, 1920, 1080, 30000, 1001, true }, // 1080i59.94. + { 0x0063, 1920, 1080, 25, 1, false }, // 1080p25. + { 0x0043, 1920, 1080, 25, 1, true }, // 1080p50. + { 0x008e, 1920, 1080, 24, 1, false }, // 1080p24. + { 0x00a1, 1920, 1080, 24000, 1001, false }, // 1080p23.98. + }; + for (const VideoFormatEntry &entry : entries) { + if (normalized_video_format == entry.normalized_video_format) { + *width = entry.width; + *height = entry.height; + *frame_rate_nom = entry.frame_rate_nom; + *frame_rate_den = entry.frame_rate_den; + *interlaced = entry.interlaced; + return true; + } + } + + printf("Unknown video format 0x%04x. Assuming 720p60.\n", video_format); + *width = 1280; + *height = 720; + *frame_rate_nom = 60; + *frame_rate_den = 1; + return false; +}