X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=bmusb.cpp;h=6867805e719e4c6983af99063f343cbb2dd94b1f;hb=76d42863dafcc8ab3b60df73c0caae9f98545a2a;hp=4b5f75a6fb95f232293a0161d2db52cf4c0ceb85;hpb=1339024634446f28d169c3010fede7227318dd62;p=bmusb diff --git a/bmusb.cpp b/bmusb.cpp index 4b5f75a..6867805 100644 --- a/bmusb.cpp +++ b/bmusb.cpp @@ -4,31 +4,34 @@ // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) // Audio comes out as 8-channel 24-bit raw audio. +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include #include -#include -#include +#ifdef __SSE4_1__ +#include +#endif +#include "bmusb.h" + #include +#include +#include +#include +#include +#include #include #include -#include -#include #include -#include -#include #include -#include "bmusb.h" +#include using namespace std; - -static int current_register = 0; - -#define NUM_REGISTERS 60 -uint8_t register_file[NUM_REGISTERS]; +using namespace std::placeholders; #define WIDTH 1280 #define HEIGHT 750 /* 30 lines ancillary data? */ @@ -44,23 +47,12 @@ uint8_t register_file[NUM_REGISTERS]; FILE *audiofp; -FrameAllocator::Frame current_video_frame; -FrameAllocator::Frame current_audio_frame; - -struct QueuedFrame { - uint16_t timecode; - uint16_t format; - FrameAllocator::Frame frame; -}; - -mutex queue_lock; -condition_variable queues_not_empty; -deque pending_video_frames; -deque pending_audio_frames; +thread usb_thread; +atomic should_quit; FrameAllocator::~FrameAllocator() {} -#define NUM_QUEUED_FRAMES 8 +#define NUM_QUEUED_FRAMES 16 class MallocFrameAllocator : public FrameAllocator { public: MallocFrameAllocator(size_t frame_size); @@ -105,10 +97,6 @@ void MallocFrameAllocator::release_frame(Frame frame) freelist.push(unique_ptr(frame.data)); } -FrameAllocator *video_frame_allocator = nullptr; -FrameAllocator *audio_frame_allocator = nullptr; -frame_callback_t frame_callback = nullptr; - bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b) { if (a == b) { @@ -121,7 +109,7 @@ bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b) } } -void queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque *q) +void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque *q) { if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) { printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n", @@ -156,11 +144,14 @@ void dump_audio_block(uint8_t *audio_start, size_t audio_len) fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); } -void dequeue_thread() +void BMUSBCapture::dequeue_thread_func() { - for ( ;; ) { + if (has_dequeue_callbacks) { + dequeue_init_callback(); + } + while (!dequeue_thread_should_quit) { unique_lock lock(queue_lock); - queues_not_empty.wait(lock, []{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); }); + queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); }); uint16_t video_timecode = pending_video_frames.front().timecode; uint16_t audio_timecode = pending_audio_frames.front().timecode; @@ -193,25 +184,12 @@ void dequeue_thread() audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format); } } -} - -void add_current_frame(const uint8_t *start, const uint8_t *end) -{ - if (current_video_frame.data == nullptr || - current_video_frame.len > current_video_frame.size) return; - if (start == end) return; - - int bytes = end - start; - if (current_video_frame.len + bytes > current_video_frame.size) { - printf("%d bytes overflow after last video frame\n", current_video_frame.len + bytes - current_video_frame.size); - //dump_frame(); - } else { - memcpy(current_video_frame.data + current_video_frame.len, start, bytes); - current_video_frame.len += bytes; + if (has_dequeue_callbacks) { + dequeue_cleanup_callback(); } } -void start_new_frame(const uint8_t *start) +void BMUSBCapture::start_new_frame(const uint8_t *start) { uint16_t format = (start[3] << 8) | start[2]; uint16_t timecode = (start[1] << 8) | start[0]; @@ -233,23 +211,7 @@ void start_new_frame(const uint8_t *start) //} } -void add_current_audio(const uint8_t *start, const uint8_t *end) -{ - if (current_audio_frame.data == nullptr || - current_audio_frame.len > current_audio_frame.size) return; - if (start == end) return; - - int bytes = end - start; - if (current_audio_frame.len + bytes > current_audio_frame.size) { - printf("%d bytes overflow after last audio block\n", current_audio_frame.len + bytes - current_audio_frame.size); - //dump_audio_block(); - } else { - memcpy(current_audio_frame.data + current_audio_frame.len, start, bytes); - current_audio_frame.len += bytes; - } -} - -void start_new_audio_block(const uint8_t *start) +void BMUSBCapture::start_new_audio_block(const uint8_t *start) { uint16_t format = (start[3] << 8) | start[2]; uint16_t timecode = (start[1] << 8) | start[0]; @@ -262,10 +224,11 @@ void start_new_audio_block(const uint8_t *start) current_audio_frame = audio_frame_allocator->alloc_frame(); } +#if 0 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack) { // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset); - for (int j = 0; j < pack->actual_length; j++) { + for (unsigned j = 0; j < pack->actual_length; j++) { //for (int j = 0; j < min(pack->actual_length, 16u); j++) { printf("%02x", xfr->buffer[j + offset]); if ((j % 16) == 15) @@ -276,11 +239,279 @@ static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_p printf(" "); } } +#endif -void decode_packs(const libusb_transfer *xfr, const char *sync_pattern, int sync_length, function add_callback, function start_callback) +void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n) +{ + assert(n % 2 == 0); + uint8_t *dptr1 = dest1; + uint8_t *dptr2 = dest2; + + for (size_t i = 0; i < n; i += 2) { + *dptr1++ = *src++; + *dptr2++ = *src++; + } +} + +void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end) +{ + if (current_frame->data == nullptr || + current_frame->len > current_frame->size || + start == end) { + return; + } + + int bytes = end - start; + if (current_frame->len + bytes > current_frame->size) { + printf("%d bytes overflow after last %s frame\n", + int(current_frame->len + bytes - current_frame->size), frame_type_name); + //dump_frame(); + } else { + if (current_frame->interleaved) { + uint8_t *data = current_frame->data + current_frame->len / 2; + uint8_t *data2 = current_frame->data2 + current_frame->len / 2; + if (current_frame->len % 2 == 1) { + ++data; + swap(data, data2); + } + if (bytes % 2 == 1) { + *data++ = *start++; + swap(data, data2); + ++current_frame->len; + --bytes; + } + memcpy_interleaved(data, data2, start, bytes); + current_frame->len += bytes; + } else { + memcpy(current_frame->data + current_frame->len, start, bytes); + current_frame->len += bytes; + } + } +} + +#ifdef __SSE4_1__ + +#if 0 +void avx2_dump(const char *name, __m256i n) +{ + printf("%-10s:", name); + printf(" %02x", _mm256_extract_epi8(n, 0)); + printf(" %02x", _mm256_extract_epi8(n, 1)); + printf(" %02x", _mm256_extract_epi8(n, 2)); + printf(" %02x", _mm256_extract_epi8(n, 3)); + printf(" %02x", _mm256_extract_epi8(n, 4)); + printf(" %02x", _mm256_extract_epi8(n, 5)); + printf(" %02x", _mm256_extract_epi8(n, 6)); + printf(" %02x", _mm256_extract_epi8(n, 7)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 8)); + printf(" %02x", _mm256_extract_epi8(n, 9)); + printf(" %02x", _mm256_extract_epi8(n, 10)); + printf(" %02x", _mm256_extract_epi8(n, 11)); + printf(" %02x", _mm256_extract_epi8(n, 12)); + printf(" %02x", _mm256_extract_epi8(n, 13)); + printf(" %02x", _mm256_extract_epi8(n, 14)); + printf(" %02x", _mm256_extract_epi8(n, 15)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 16)); + printf(" %02x", _mm256_extract_epi8(n, 17)); + printf(" %02x", _mm256_extract_epi8(n, 18)); + printf(" %02x", _mm256_extract_epi8(n, 19)); + printf(" %02x", _mm256_extract_epi8(n, 20)); + printf(" %02x", _mm256_extract_epi8(n, 21)); + printf(" %02x", _mm256_extract_epi8(n, 22)); + printf(" %02x", _mm256_extract_epi8(n, 23)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 24)); + printf(" %02x", _mm256_extract_epi8(n, 25)); + printf(" %02x", _mm256_extract_epi8(n, 26)); + printf(" %02x", _mm256_extract_epi8(n, 27)); + printf(" %02x", _mm256_extract_epi8(n, 28)); + printf(" %02x", _mm256_extract_epi8(n, 29)); + printf(" %02x", _mm256_extract_epi8(n, 30)); + printf(" %02x", _mm256_extract_epi8(n, 31)); + printf("\n"); +} +#endif + +// Does a memcpy and memchr in one to reduce processing time. +// Note that the benefit is somewhat limited if your L3 cache is small, +// as you'll (unfortunately) spend most of the time loading the data +// from main memory. +// +// Complicated cases are left to the slow path; it basically stops copying +// up until the first instance of "sync_char" (usually a bit before, actually). +// This is fine, since 0x00 bytes shouldn't really show up in normal picture +// data, and what we really need this for is the 00 00 ff ff marker in video data. +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + if (current_frame->data == nullptr || + current_frame->len > current_frame->size || + start == limit) { + return start; + } + size_t orig_bytes = limit - start; + if (orig_bytes < 128) { + // Don't bother. + return start; + } + + // Don't read more bytes than we can write. + limit = min(limit, start + (current_frame->size - current_frame->len)); + + // Align end to 32 bytes. + limit = (const uint8_t *)(intptr_t(limit) & ~31); + + if (start >= limit) { + return start; + } + + // Process [0,31] bytes, such that start gets aligned to 32 bytes. + const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31); + if (aligned_start != start) { + const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start); + if (sync_start == nullptr) { + add_to_frame(current_frame, "", start, aligned_start); + } else { + add_to_frame(current_frame, "", start, sync_start); + return sync_start; + } + } + + // Make the length a multiple of 64. + if (current_frame->interleaved) { + if (((limit - aligned_start) % 64) != 0) { + limit -= 32; + } + assert(((limit - aligned_start) % 64) == 0); + } + +#if __AVX2__ + const __m256i needle = _mm256_set1_epi8(sync_char); + + const __restrict __m256i *in = (const __m256i *)aligned_start; + if (current_frame->interleaved) { + __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); + __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2); + if (current_frame->len % 2 == 1) { + swap(out1, out2); + } + + __m256i shuffle_cw = _mm256_set_epi8( + 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0, + 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); + while (in < (const __m256i *)limit) { + // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128). + __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh + __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp + + __m256i found1 = _mm256_cmpeq_epi8(data1, needle); + __m256i found2 = _mm256_cmpeq_epi8(data2, needle); + __m256i found = _mm256_or_si256(found1, found2); + + data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh + data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop + + data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh + data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop + + __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000); + __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001); + + _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used. + _mm256_storeu_si256(out2, hi); + + if (!_mm256_testz_si256(found, found)) { + break; + } + + in += 2; + ++out1; + ++out2; + } + current_frame->len += (uint8_t *)in - aligned_start; + } else { + __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + while (in < (const __m256i *)limit) { + __m256i data = _mm256_load_si256(in); + _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. + __m256i found = _mm256_cmpeq_epi8(data, needle); + if (!_mm256_testz_si256(found, found)) { + break; + } + + ++in; + ++out; + } + current_frame->len = (uint8_t *)out - current_frame->data; + } +#else + const __m128i needle = _mm_set1_epi8(sync_char); + + const __m128i *in = (const __m128i *)aligned_start; + if (current_frame->interleaved) { + __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); + __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); + if (current_frame->len % 2 == 1) { + swap(out1, out2); + } + + __m128i mask_lower_byte = _mm_set1_epi16(0x00ff); + while (in < (const __m128i *)limit) { + __m128i data1 = _mm_load_si128(in); + __m128i data2 = _mm_load_si128(in + 1); + __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte); + __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte); + __m128i data1_hi = _mm_srli_epi16(data1, 8); + __m128i data2_hi = _mm_srli_epi16(data2, 8); + __m128i lo = _mm_packus_epi16(data1_lo, data2_lo); + _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used. + __m128i hi = _mm_packus_epi16(data1_hi, data2_hi); + _mm_storeu_si128(out2, hi); + __m128i found1 = _mm_cmpeq_epi8(data1, needle); + __m128i found2 = _mm_cmpeq_epi8(data2, needle); + if (!_mm_testz_si128(found1, found1) || + !_mm_testz_si128(found2, found2)) { + break; + } + + in += 2; + ++out1; + ++out2; + } + current_frame->len += (uint8_t *)in - aligned_start; + } else { + __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + while (in < (const __m128i *)limit) { + __m128i data = _mm_load_si128(in); + _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. + __m128i found = _mm_cmpeq_epi8(data, needle); + if (!_mm_testz_si128(found, found)) { + break; + } + + ++in; + ++out; + } + current_frame->len = (uint8_t *)out - current_frame->data; + } +#endif + + //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); + + return (const uint8_t *)in; +} +#endif + +void decode_packs(const libusb_transfer *xfr, + const char *sync_pattern, + int sync_length, + FrameAllocator::Frame *current_frame, + const char *frame_type_name, + function start_callback) { int offset = 0; - for (unsigned i = 0; i < xfr->num_iso_packets; i++) { + for (int i = 0; i < xfr->num_iso_packets; i++) { const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i]; if (pack->status != LIBUSB_TRANSFER_COMPLETED) { @@ -289,19 +520,24 @@ void decode_packs(const libusb_transfer *xfr, const char *sync_pattern, int sync //exit(5); } - const unsigned char *iso_start = xfr->buffer + offset; - for (int iso_offset = 0; iso_offset < pack->actual_length; ) { // Usually runs only one iteration. - const unsigned char* start_next_frame = (const unsigned char *)memmem(iso_start + iso_offset, pack->actual_length - iso_offset, sync_pattern, sync_length); + const uint8_t *start = xfr->buffer + offset; + const uint8_t *limit = start + pack->actual_length; + while (start < limit) { // Usually runs only one iteration. +#ifdef __SSE4_1__ + start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); + if (start == limit) break; + assert(start < limit); +#endif + + const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length); if (start_next_frame == nullptr) { // add the rest of the buffer - add_callback(iso_start + iso_offset, iso_start + pack->actual_length); + add_to_frame(current_frame, frame_type_name, start, limit); break; } else { - add_callback(iso_start + iso_offset, start_next_frame); - start_callback(start_next_frame + sync_length); - - int suboffset = start_next_frame - iso_start; - iso_offset = suboffset + sync_length; // skip sync + add_to_frame(current_frame, frame_type_name, start, start_next_frame); + start = start_next_frame + sync_length; // skip sync + start_callback(start); } } #if 0 @@ -311,7 +547,7 @@ void decode_packs(const libusb_transfer *xfr, const char *sync_pattern, int sync } } -static void cb_xfr(struct libusb_transfer *xfr) +void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) { if (xfr->status != LIBUSB_TRANSFER_COMPLETED) { fprintf(stderr, "transfer status %d\n", xfr->status); @@ -319,15 +555,18 @@ static void cb_xfr(struct libusb_transfer *xfr) exit(3); } + assert(xfr->user_data != nullptr); + BMUSBCapture *usb = static_cast(xfr->user_data); + if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) { if (xfr->endpoint == 0x84) { - decode_packs(xfr, "DeckLinkAudioResyncT", 20, add_current_audio, start_new_audio_block); + decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1)); } else { - decode_packs(xfr, "\x00\x00\xff\xff", 4, add_current_frame, start_new_frame); + decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); } } if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { - const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr); + //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr); uint8_t *buf = libusb_control_transfer_get_data(xfr); #if 0 if (setup->wIndex == 44) { @@ -337,19 +576,19 @@ static void cb_xfr(struct libusb_transfer *xfr) setup->wIndex, buf[0], buf[1], buf[2], buf[3]); } #else - memcpy(register_file + current_register, buf, 4); - current_register = (current_register + 4) % NUM_REGISTERS; - if (current_register == 0) { + memcpy(usb->register_file + usb->current_register, buf, 4); + usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS; + if (usb->current_register == 0) { // read through all of them printf("register dump:"); - for (int i = 0; i < NUM_REGISTERS; i += 4) { - printf(" 0x%02x%02x%02x%02x", register_file[i], register_file[i + 1], register_file[i + 2], register_file[i + 3]); + for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) { + printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]); } printf("\n"); } libusb_fill_control_setup(xfr->buffer, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, - /*index=*/current_register, /*length=*/4); + /*index=*/usb->current_register, /*length=*/4); #endif } @@ -366,56 +605,28 @@ static void cb_xfr(struct libusb_transfer *xfr) } #endif -end: if (libusb_submit_transfer(xfr) < 0) { fprintf(stderr, "error re-submitting URB\n"); exit(1); } } -void usb_thread() +void BMUSBCapture::usb_thread_func() { - printf("usb thread started\n"); - sched_param param; memset(¶m, 0, sizeof(param)); param.sched_priority = 1; if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) { printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno)); } - while (true) { + while (!should_quit) { int rc = libusb_handle_events(nullptr); if (rc != LIBUSB_SUCCESS) break; } } -FrameAllocator *get_video_frame_allocator() -{ - return video_frame_allocator; -} - -void set_video_frame_allocator(FrameAllocator *allocator) -{ - video_frame_allocator = allocator; -} - -FrameAllocator *get_audio_frame_allocator() -{ - return audio_frame_allocator; -} - -void set_audio_frame_allocator(FrameAllocator *allocator) -{ - audio_frame_allocator = allocator; -} - -void set_frame_callback(frame_callback_t callback) -{ - frame_callback = callback; -} - -void start_bm_capture() +void BMUSBCapture::configure_card() { if (video_frame_allocator == nullptr) { set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak. @@ -423,11 +634,11 @@ void start_bm_capture() if (audio_frame_allocator == nullptr) { set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak. } - thread(dequeue_thread).detach(); + dequeue_thread_should_quit = false; + dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this); int rc; struct libusb_transfer *xfr; - vector iso_xfrs; rc = libusb_init(nullptr); if (rc < 0) { @@ -435,7 +646,9 @@ void start_bm_capture() exit(1); } - struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f); + struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid); if (!devh) { fprintf(stderr, "Error finding USB device\n"); exit(1); @@ -452,8 +665,8 @@ void start_bm_capture() printf(" interface %d\n", interface_number); const libusb_interface *interface = &config->interface[interface_number]; for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) { - printf(" alternate setting %d\n", altsetting); const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting]; + printf(" alternate setting %d\n", interface_desc->bAlternateSetting); for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) { const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number]; printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress); @@ -476,6 +689,9 @@ void start_bm_capture() // Alternate setting 1 is output, alternate setting 2 is input. // Card is reset when switching alternates, so the driver uses // this “double switch” when it wants to reset. + // + // There's also alternate settings 3 and 4, which seem to be + // like 1 and 2 except they advertise less bandwidth needed. rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); if (rc < 0) { fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); @@ -523,6 +739,8 @@ void start_bm_capture() // // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal? // + // Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them). + // // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels). // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a). // @@ -532,6 +750,11 @@ void start_bm_capture() // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio), // but the driver sets it to 0x8036802a at some point. // + // all of this is on request 214/215. other requests (192, 219, + // 222, 223, 224) are used for firmware upgrade. Probably best to + // stay out of it unless you know what you're doing. + // + // // register 16: // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal // @@ -551,59 +774,6 @@ void start_bm_capture() static const ctrl ctrls[] = { { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 4, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 20, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 28, 0 }, - { LIBUSB_ENDPOINT_IN, 215, 32, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 36, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 216, 44, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 48, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 52, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, // packet 354 - { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, - { LIBUSB_ENDPOINT_IN, 214, 40, 0 }, - // more... - //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, - //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, // wow, some kind of mode // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit // capture (v210). @@ -613,49 +783,13 @@ void start_bm_capture() // 0x3c000000 = composite video? (analog audio) // 0x3e000000 = s-video? (analog audio) { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 }, + //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, - - //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0xffffffff }, - //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0xffffffff }, - //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x40404040 }, - //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x40404040 }, - //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x8036802a }, { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start? - //{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x13370001 }, // latch for frame start? { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, // - //{ LIBUSB_ENDPOINT_OUT, 215, 4, 0x00000000 }, // appears to have no e fect - //{ LIBUSB_ENDPOINT_OUT, 215, 8, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 20, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x00000000 }, // appears to have no effect - //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x00000000 }, // appears to have no effect -#if 0 - { LIBUSB_ENDPOINT_OUT, 215, 0 }, - { LIBUSB_ENDPOINT_OUT, 215, 0 }, - { LIBUSB_ENDPOINT_OUT, 215, 28 }, - { LIBUSB_ENDPOINT_OUT, 215, 32 }, - { LIBUSB_ENDPOINT_OUT, 215, 36 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 0 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, - { LIBUSB_ENDPOINT_OUT, 215, 24 }, -#endif }; - for (int req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) { + for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) { uint32_t flipped = htonl(ctrls[req].data); static uint8_t value[4]; memcpy(value, &flipped, sizeof(flipped)); @@ -705,6 +839,7 @@ void start_bm_capture() LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, /*index=*/44, /*length=*/4); libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0); + xfr->user_data = this; libusb_submit_transfer(xfr); // set up an asynchronous transfer of register 24 @@ -716,6 +851,7 @@ void start_bm_capture() LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, /*index=*/24, /*length=*/4); libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0); + xfr->user_data = this; libusb_submit_transfer(xfr); #endif @@ -728,6 +864,7 @@ void start_bm_capture() LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, /*index=*/current_register, /*length=*/4); libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0); + xfr->user_data = this; //libusb_submit_transfer(xfr); audiofp = fopen("audio.raw", "wb"); @@ -749,7 +886,7 @@ void start_bm_capture() size &= ~1023; size += 1024; } - num_iso_pack = (2 << 20) / size; // 2 MB. + num_iso_pack = (2 << 18) / size; // 512 kB. printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size); } else { size = 0xc0; @@ -768,26 +905,28 @@ void start_bm_capture() libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes, num_iso_pack, cb_xfr, nullptr, 0); libusb_set_iso_packet_lengths(xfr, size); + xfr->user_data = this; iso_xfrs.push_back(xfr); } } +} - { - int i = 0; - for (libusb_transfer *xfr : iso_xfrs) { - rc = libusb_submit_transfer(xfr); - ++i; - if (rc < 0) { - //printf("num_bytes=%d\n", num_bytes); - fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n", - xfr->endpoint, i, libusb_error_name(rc)); - exit(1); - } +void BMUSBCapture::start_bm_capture() +{ + printf("starting capture\n"); + int i = 0; + for (libusb_transfer *xfr : iso_xfrs) { + printf("submitting transfer...\n"); + int rc = libusb_submit_transfer(xfr); + ++i; + if (rc < 0) { + //printf("num_bytes=%d\n", num_bytes); + fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n", + xfr->endpoint, i, libusb_error_name(rc)); + exit(1); } } - thread(usb_thread).detach(); - #if 0 libusb_release_interface(devh, 0); @@ -798,3 +937,22 @@ out: return rc; #endif } + +void BMUSBCapture::stop_dequeue_thread() +{ + dequeue_thread_should_quit = true; + queues_not_empty.notify_all(); + dequeue_thread.join(); +} + +void BMUSBCapture::start_bm_thread() +{ + should_quit = false; + usb_thread = thread(&BMUSBCapture::usb_thread_func); +} + +void BMUSBCapture::stop_bm_thread() +{ + should_quit = true; + usb_thread.join(); +}