// 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
// Audio comes out as 8-channel 24-bit raw audio.
+#include <assert.h>
+#include <errno.h>
+#include <libusb.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include <libusb.h>
-#include <arpa/inet.h>
-#include <unistd.h>
#include <string.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <assert.h>
-#ifdef __SSE2__
+#ifdef __SSE4_1__
#include <immintrin.h>
#endif
+#include "bmusb.h"
+
#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <cstddef>
+#include <cstdint>
+#include <deque>
#include <functional>
#include <memory>
-#include <deque>
-#include <utility>
#include <mutex>
-#include <condition_variable>
-#include <thread>
#include <stack>
-#include <atomic>
-#include "bmusb.h"
+#include <thread>
using namespace std;
using namespace std::placeholders;
FILE *audiofp;
+thread usb_thread;
+atomic<bool> should_quit;
+
FrameAllocator::~FrameAllocator() {}
#define NUM_QUEUED_FRAMES 8
}
}
-#ifdef __SSE2__
+#ifdef __SSE4_1__
+
+#if 0
+void avx2_dump(const char *name, __m256i n)
+{
+ printf("%-10s:", name);
+ printf(" %02x", _mm256_extract_epi8(n, 0));
+ printf(" %02x", _mm256_extract_epi8(n, 1));
+ printf(" %02x", _mm256_extract_epi8(n, 2));
+ printf(" %02x", _mm256_extract_epi8(n, 3));
+ printf(" %02x", _mm256_extract_epi8(n, 4));
+ printf(" %02x", _mm256_extract_epi8(n, 5));
+ printf(" %02x", _mm256_extract_epi8(n, 6));
+ printf(" %02x", _mm256_extract_epi8(n, 7));
+ printf(" ");
+ printf(" %02x", _mm256_extract_epi8(n, 8));
+ printf(" %02x", _mm256_extract_epi8(n, 9));
+ printf(" %02x", _mm256_extract_epi8(n, 10));
+ printf(" %02x", _mm256_extract_epi8(n, 11));
+ printf(" %02x", _mm256_extract_epi8(n, 12));
+ printf(" %02x", _mm256_extract_epi8(n, 13));
+ printf(" %02x", _mm256_extract_epi8(n, 14));
+ printf(" %02x", _mm256_extract_epi8(n, 15));
+ printf(" ");
+ printf(" %02x", _mm256_extract_epi8(n, 16));
+ printf(" %02x", _mm256_extract_epi8(n, 17));
+ printf(" %02x", _mm256_extract_epi8(n, 18));
+ printf(" %02x", _mm256_extract_epi8(n, 19));
+ printf(" %02x", _mm256_extract_epi8(n, 20));
+ printf(" %02x", _mm256_extract_epi8(n, 21));
+ printf(" %02x", _mm256_extract_epi8(n, 22));
+ printf(" %02x", _mm256_extract_epi8(n, 23));
+ printf(" ");
+ printf(" %02x", _mm256_extract_epi8(n, 24));
+ printf(" %02x", _mm256_extract_epi8(n, 25));
+ printf(" %02x", _mm256_extract_epi8(n, 26));
+ printf(" %02x", _mm256_extract_epi8(n, 27));
+ printf(" %02x", _mm256_extract_epi8(n, 28));
+ printf(" %02x", _mm256_extract_epi8(n, 29));
+ printf(" %02x", _mm256_extract_epi8(n, 30));
+ printf(" %02x", _mm256_extract_epi8(n, 31));
+ printf("\n");
+}
+#endif
// Does a memcpy and memchr in one to reduce processing time.
// Note that the benefit is somewhat limited if your L3 cache is small,
#if __AVX2__
const __m256i needle = _mm256_set1_epi8(sync_char);
- const __m256i *in = (const __m256i *)aligned_start;
+ const __restrict __m256i *in = (const __m256i *)aligned_start;
if (current_frame->interleaved) {
- __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
- __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
+ __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
+ __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
if (current_frame->len % 2 == 1) {
swap(out1, out2);
}
- __m256i mask_lower_byte = _mm256_set1_epi16(0x00ff);
+ __m256i shuffle_cw = _mm256_set_epi8(
+ 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
+ 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
while (in < (const __m256i *)limit) {
- __m256i data1 = _mm256_load_si256(in);
- __m256i data2 = _mm256_load_si256(in + 1);
- __m256i data1_lo = _mm256_and_si256(data1, mask_lower_byte);
- __m256i data2_lo = _mm256_and_si256(data2, mask_lower_byte);
- __m256i data1_hi = _mm256_srli_epi16(data1, 8);
- __m256i data2_hi = _mm256_srli_epi16(data2, 8);
- __m256i lo = _mm256_packus_epi16(data1_lo, data2_lo);
- lo = _mm256_permute4x64_epi64(lo, 0b11011000);
- _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
- __m256i hi = _mm256_packus_epi16(data1_hi, data2_hi);
- hi = _mm256_permute4x64_epi64(hi, 0b11011000);
- _mm256_storeu_si256(out2, hi);
+ // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
+ __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh
+ __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp
+
__m256i found1 = _mm256_cmpeq_epi8(data1, needle);
__m256i found2 = _mm256_cmpeq_epi8(data2, needle);
- if (!_mm256_testz_si256(found1, found1) ||
- !_mm256_testz_si256(found2, found2)) {
+ __m256i found = _mm256_or_si256(found1, found2);
+
+ data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh
+ data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop
+
+ data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh
+ data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop
+
+ __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
+ __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
+
+ _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used.
+ _mm256_storeu_si256(out2, hi);
+
+ if (!_mm256_testz_si256(found, found)) {
break;
}
const uint8_t *start = xfr->buffer + offset;
const uint8_t *limit = start + pack->actual_length;
while (start < limit) { // Usually runs only one iteration.
-#ifdef __SSE2__
+#ifdef __SSE4_1__
start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
if (start == limit) break;
assert(start < limit);
void BMUSBCapture::usb_thread_func()
{
- printf("usb thread started\n");
-
sched_param param;
memset(¶m, 0, sizeof(param));
param.sched_priority = 1;
}
}
-void BMUSBCapture::start_bm_capture()
+void BMUSBCapture::configure_card()
{
if (video_frame_allocator == nullptr) {
set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
int rc;
struct libusb_transfer *xfr;
- vector<libusb_transfer *> iso_xfrs;
rc = libusb_init(nullptr);
if (rc < 0) {
exit(1);
}
- struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
+ //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
+ //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
+ struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
if (!devh) {
fprintf(stderr, "Error finding USB device\n");
exit(1);
static const ctrl ctrls[] = {
{ LIBUSB_ENDPOINT_IN, 214, 16, 0 },
{ LIBUSB_ENDPOINT_IN, 214, 0, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 0, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 4, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 16, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 20, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 24, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 28, 0 },
- { LIBUSB_ENDPOINT_IN, 215, 32, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 36, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 216, 44, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 48, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 52, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 24, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 24, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 }, // packet 354
- { LIBUSB_ENDPOINT_IN, 214, 24, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 12, 0 },
- { LIBUSB_ENDPOINT_IN, 214, 40, 0 },
- // more...
- //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
- //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, // wow, some kind of mode
// seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
// capture (v210).
// 0x3c000000 = composite video? (analog audio)
// 0x3e000000 = s-video? (analog audio)
{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 },
+ //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 },
//{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 },
-
- //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0xffffffff },
- //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0xffffffff },
- //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x40404040 },
- //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x40404040 },
- //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x8036802a },
{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start?
- //{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x13370001 }, // latch for frame start?
{ LIBUSB_ENDPOINT_IN, 214, 24, 0 }, //
- //{ LIBUSB_ENDPOINT_OUT, 215, 4, 0x00000000 }, // appears to have no e fect
- //{ LIBUSB_ENDPOINT_OUT, 215, 8, 0x00000000 }, // appears to have no effect
- //{ LIBUSB_ENDPOINT_OUT, 215, 20, 0x00000000 }, // appears to have no effect
- //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x00000000 }, // appears to have no effect
- //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x00000000 }, // appears to have no effect
- //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x00000000 }, // appears to have no effect
-#if 0
- { LIBUSB_ENDPOINT_OUT, 215, 0 },
- { LIBUSB_ENDPOINT_OUT, 215, 0 },
- { LIBUSB_ENDPOINT_OUT, 215, 28 },
- { LIBUSB_ENDPOINT_OUT, 215, 32 },
- { LIBUSB_ENDPOINT_OUT, 215, 36 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 0 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
- { LIBUSB_ENDPOINT_OUT, 215, 24 },
-#endif
};
for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
size &= ~1023;
size += 1024;
}
- num_iso_pack = (2 << 20) / size; // 2 MB.
+ num_iso_pack = (2 << 18) / size; // 512 kB.
printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
} else {
size = 0xc0;
iso_xfrs.push_back(xfr);
}
}
+}
- {
- int i = 0;
- for (libusb_transfer *xfr : iso_xfrs) {
- rc = libusb_submit_transfer(xfr);
- ++i;
- if (rc < 0) {
- //printf("num_bytes=%d\n", num_bytes);
- fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
- xfr->endpoint, i, libusb_error_name(rc));
- exit(1);
- }
+void BMUSBCapture::start_bm_capture()
+{
+ printf("starting capture\n");
+ int i = 0;
+ for (libusb_transfer *xfr : iso_xfrs) {
+ printf("submitting transfer...\n");
+ int rc = libusb_submit_transfer(xfr);
+ ++i;
+ if (rc < 0) {
+ //printf("num_bytes=%d\n", num_bytes);
+ fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
+ xfr->endpoint, i, libusb_error_name(rc));
+ exit(1);
}
}
- should_quit = false;
- usb_thread = thread(&BMUSBCapture::usb_thread_func, this);
-
#if 0
libusb_release_interface(devh, 0);
#endif
}
-void BMUSBCapture::stop_bm_capture()
+void BMUSBCapture::start_bm_thread()
+{
+ should_quit = false;
+ usb_thread = thread(&BMUSBCapture::usb_thread_func);
+}
+
+void BMUSBCapture::stop_bm_thread()
{
should_quit = true;
usb_thread.join();