X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=inline;f=bmusb.cpp;h=ac02852cb2d4af37fd64f5a7d2454cc1d54a9f98;hb=f38bf2bef09e52ba1e15678f5688132f282b50c2;hp=1b4628e378c18796e192db4cd260ab92584e0b8f;hpb=03d475289549453b606c1c04d21e2929abef1e90;p=bmusb diff --git a/bmusb.cpp b/bmusb.cpp index 1b4628e..ac02852 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -4,29 +4,31 @@ // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include #include -#include -#include -#include -#ifdef __SSE2__ +#ifdef __SSE4_1__ #include #endif +#include "bmusb.h" + #include +#include +#include +#include +#include +#include #include #include -#include -#include #include -#include -#include #include -#include -#include "bmusb.h" +#include using namespace std; using namespace std::placeholders; @@ -45,9 +47,12 @@ using namespace std::placeholders; FILE *audiofp; +thread usb_thread; +atomic should_quit; + FrameAllocator::~FrameAllocator() {} -#define NUM_QUEUED_FRAMES 8 +#define NUM_QUEUED_FRAMES 16 class MallocFrameAllocator : public FrameAllocator { public: MallocFrameAllocator(size_t frame_size); @@ -88,6 +93,9 @@ FrameAllocator::Frame MallocFrameAllocator::alloc_frame() void MallocFrameAllocator::release_frame(Frame frame) { + if (frame.overflow > 0) { + printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow)); + } unique_lock lock(freelist_mutex); freelist.push(unique_ptr(frame.data)); } @@ -139,11 +147,14 @@ void dump_audio_block(uint8_t *audio_start, size_t audio_len) fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); } -void BMUSBCapture::dequeue_thread() +void BMUSBCapture::dequeue_thread_func() { - for ( ;; ) { + if (has_dequeue_callbacks) { + dequeue_init_callback(); + } + while (!dequeue_thread_should_quit) { unique_lock lock(queue_lock); - queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); }); + queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); }); uint16_t video_timecode = pending_video_frames.front().timecode; uint16_t audio_timecode = pending_audio_frames.front().timecode; @@ -176,6 +187,9 @@ void BMUSBCapture::dequeue_thread() audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format); } } + if (has_dequeue_callbacks) { + dequeue_cleanup_callback(); + } } void BMUSBCapture::start_new_frame(const uint8_t *start) @@ -252,8 +266,13 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n int bytes = end - start; if (current_frame->len + bytes > current_frame->size) { - printf("%d bytes overflow after last %s frame\n", - int(current_frame->len + bytes - current_frame->size), frame_type_name); + current_frame->overflow = current_frame->len + bytes - current_frame->size; + current_frame->len = current_frame->size; + if (current_frame->overflow > 1048576) { + printf("%d bytes overflow after last %s frame\n", + int(current_frame->overflow), frame_type_name); + current_frame->overflow = 0; + } //dump_frame(); } else { if (current_frame->interleaved) { @@ -278,7 +297,50 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n } } -#ifdef __SSE2__ +#ifdef __SSE4_1__ + +#if 0 +void avx2_dump(const char *name, __m256i n) +{ + printf("%-10s:", name); + printf(" %02x", _mm256_extract_epi8(n, 0)); + printf(" %02x", _mm256_extract_epi8(n, 1)); + printf(" %02x", _mm256_extract_epi8(n, 2)); + printf(" %02x", _mm256_extract_epi8(n, 3)); + printf(" %02x", _mm256_extract_epi8(n, 4)); + printf(" %02x", _mm256_extract_epi8(n, 5)); + printf(" %02x", _mm256_extract_epi8(n, 6)); + printf(" %02x", _mm256_extract_epi8(n, 7)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 8)); + printf(" %02x", _mm256_extract_epi8(n, 9)); + printf(" %02x", _mm256_extract_epi8(n, 10)); + printf(" %02x", _mm256_extract_epi8(n, 11)); + printf(" %02x", _mm256_extract_epi8(n, 12)); + printf(" %02x", _mm256_extract_epi8(n, 13)); + printf(" %02x", _mm256_extract_epi8(n, 14)); + printf(" %02x", _mm256_extract_epi8(n, 15)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 16)); + printf(" %02x", _mm256_extract_epi8(n, 17)); + printf(" %02x", _mm256_extract_epi8(n, 18)); + printf(" %02x", _mm256_extract_epi8(n, 19)); + printf(" %02x", _mm256_extract_epi8(n, 20)); + printf(" %02x", _mm256_extract_epi8(n, 21)); + printf(" %02x", _mm256_extract_epi8(n, 22)); + printf(" %02x", _mm256_extract_epi8(n, 23)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 24)); + printf(" %02x", _mm256_extract_epi8(n, 25)); + printf(" %02x", _mm256_extract_epi8(n, 26)); + printf(" %02x", _mm256_extract_epi8(n, 27)); + printf(" %02x", _mm256_extract_epi8(n, 28)); + printf(" %02x", _mm256_extract_epi8(n, 29)); + printf(" %02x", _mm256_extract_epi8(n, 30)); + printf(" %02x", _mm256_extract_epi8(n, 31)); + printf("\n"); +} +#endif // Does a memcpy and memchr in one to reduce processing time. // Note that the benefit is somewhat limited if your L3 cache is small, @@ -469,7 +531,7 @@ void decode_packs(const libusb_transfer *xfr, const uint8_t *start = xfr->buffer + offset; const uint8_t *limit = start + pack->actual_length; while (start < limit) { // Usually runs only one iteration. -#ifdef __SSE2__ +#ifdef __SSE4_1__ start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); if (start == limit) break; assert(start < limit); @@ -559,8 +621,6 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) void BMUSBCapture::usb_thread_func() { - printf("usb thread started\n"); - sched_param param; memset(¶m, 0, sizeof(param)); param.sched_priority = 1; @@ -574,7 +634,7 @@ void BMUSBCapture::usb_thread_func() } } -void BMUSBCapture::start_bm_capture() +void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak. @@ -582,11 +642,11 @@ void BMUSBCapture::start_bm_capture() if (audio_frame_allocator == nullptr) { set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak. } - thread(&BMUSBCapture::dequeue_thread, this).detach(); + dequeue_thread_should_quit = false; + dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this); int rc; struct libusb_transfer *xfr; - vector iso_xfrs; rc = libusb_init(nullptr); if (rc < 0) { @@ -594,7 +654,9 @@ void BMUSBCapture::start_bm_capture() exit(1); } - struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f); + struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid); if (!devh) { fprintf(stderr, "Error finding USB device\n"); exit(1); @@ -635,6 +697,9 @@ void BMUSBCapture::start_bm_capture() // Alternate setting 1 is output, alternate setting 2 is input. // Card is reset when switching alternates, so the driver uses // this “double switch” when it wants to reset. + // + // There's also alternate settings 3 and 4, which seem to be + // like 1 and 2 except they advertise less bandwidth needed. rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); if (rc < 0) { fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); @@ -717,59 +782,6 @@ void BMUSBCapture::start_bm_capture() static const ctrl ctrls[] = { { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 4, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 20, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 28, 0 }, - { LIBUSB_ENDPOINT_IN, 215, 32, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 36, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 216, 44, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 48, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 52, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, // packet 354 - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - // more... - //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, - //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, // wow, some kind of mode // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit // capture (v210). @@ -779,46 +791,10 @@ void BMUSBCapture::start_bm_capture() // 0x3c000000 = composite video? (analog audio) // 0x3e000000 = s-video? (analog audio) { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 }, + //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, - - //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0xffffffff }, - //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0xffffffff }, - //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x40404040 }, - //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x40404040 }, - //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x8036802a }, { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start? - //{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x13370001 }, // latch for frame start? { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, // - //{ LIBUSB_ENDPOINT_OUT, 215, 4, 0x00000000 }, // appears to have no e fect - //{ LIBUSB_ENDPOINT_OUT, 215, 8, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 20, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x00000000 }, // appears to have no effect -#if 0 - { LIBUSB_ENDPOINT_OUT, 215, 0 }, - { LIBUSB_ENDPOINT_OUT, 215, 0 }, - { LIBUSB_ENDPOINT_OUT, 215, 28 }, - { LIBUSB_ENDPOINT_OUT, 215, 32 }, - { LIBUSB_ENDPOINT_OUT, 215, 36 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 0 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, -#endif }; for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) { @@ -918,7 +894,7 @@ void BMUSBCapture::start_bm_capture() size &= ~1023; size += 1024; } - num_iso_pack = (2 << 20) / size; // 2 MB. + num_iso_pack = (2 << 18) / size; // 512 kB. printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size); } else { size = 0xc0; @@ -941,24 +917,24 @@ void BMUSBCapture::start_bm_capture() iso_xfrs.push_back(xfr); } } +} - { - int i = 0; - for (libusb_transfer *xfr : iso_xfrs) { - rc = libusb_submit_transfer(xfr); - ++i; - if (rc < 0) { - //printf("num_bytes=%d\n", num_bytes); - fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n", - xfr->endpoint, i, libusb_error_name(rc)); - exit(1); - } +void BMUSBCapture::start_bm_capture() +{ + printf("starting capture\n"); + int i = 0; + for (libusb_transfer *xfr : iso_xfrs) { + printf("submitting transfer...\n"); + int rc = libusb_submit_transfer(xfr); + ++i; + if (rc < 0) { + //printf("num_bytes=%d\n", num_bytes); + fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n", + xfr->endpoint, i, libusb_error_name(rc)); + exit(1); } } - should_quit = false; - usb_thread = thread(&BMUSBCapture::usb_thread_func, this); - #if 0 libusb_release_interface(devh, 0); @@ -970,7 +946,20 @@ out: #endif } -void BMUSBCapture::stop_bm_capture() +void BMUSBCapture::stop_dequeue_thread() +{ + dequeue_thread_should_quit = true; + queues_not_empty.notify_all(); + dequeue_thread.join(); +} + +void BMUSBCapture::start_bm_thread() +{ + should_quit = false; + usb_thread = thread(&BMUSBCapture::usb_thread_func); +} + +void BMUSBCapture::stop_bm_thread() { should_quit = true; usb_thread.join();