// 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
// Audio comes out as 8-channel 24-bit raw audio.
+#include <assert.h>
+#include <errno.h>
+#include <libusb.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include <libusb.h>
-#include <arpa/inet.h>
-#include <unistd.h>
#include <string.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <assert.h>
-#ifdef __SSE2__
+#ifdef __SSE4_1__
#include <immintrin.h>
#endif
+#include "bmusb.h"
+
#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <cstddef>
+#include <cstdint>
+#include <deque>
#include <functional>
#include <memory>
-#include <deque>
-#include <utility>
#include <mutex>
-#include <condition_variable>
-#include <thread>
#include <stack>
-#include <atomic>
-#include "bmusb.h"
+#include <thread>
using namespace std;
using namespace std::placeholders;
FrameAllocator::~FrameAllocator() {}
-#define NUM_QUEUED_FRAMES 8
+// Audio is more important than video, and also much cheaper.
+// By having many more audio frames available, hopefully if something
+// starts to drop, we'll have CPU load go down (from not having to
+// process as much video) before we have to drop audio.
+#define NUM_QUEUED_VIDEO_FRAMES 16
+#define NUM_QUEUED_AUDIO_FRAMES 64
+
class MallocFrameAllocator : public FrameAllocator {
public:
- MallocFrameAllocator(size_t frame_size);
+ MallocFrameAllocator(size_t frame_size, size_t num_queued_frames);
Frame alloc_frame() override;
void release_frame(Frame frame) override;
stack<unique_ptr<uint8_t[]>> freelist; // All of size <frame_size>.
};
-MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
+MallocFrameAllocator::MallocFrameAllocator(size_t frame_size, size_t num_queued_frames)
: frame_size(frame_size)
{
- for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
+ for (size_t i = 0; i < num_queued_frames; ++i) {
freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
}
}
void MallocFrameAllocator::release_frame(Frame frame)
{
+ if (frame.overflow > 0) {
+ printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
+ }
unique_lock<mutex> lock(freelist_mutex);
freelist.push(unique_ptr<uint8_t[]>(frame.data));
}
void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
{
+ unique_lock<mutex> lock(queue_lock);
if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
q->back().timecode, timecode);
qf.format = format;
qf.timecode = timecode;
qf.frame = frame;
-
- {
- unique_lock<mutex> lock(queue_lock);
- q->push_back(move(qf));
- }
+ q->push_back(move(qf));
queues_not_empty.notify_one(); // might be spurious
}
fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
}
-void BMUSBCapture::dequeue_thread()
+void BMUSBCapture::dequeue_thread_func()
{
- for ( ;; ) {
+ if (has_dequeue_callbacks) {
+ dequeue_init_callback();
+ }
+ while (!dequeue_thread_should_quit) {
unique_lock<mutex> lock(queue_lock);
- queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
+ queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
+
+ if (dequeue_thread_should_quit) break;
uint16_t video_timecode = pending_video_frames.front().timecode;
uint16_t audio_timecode = pending_audio_frames.front().timecode;
- if (video_timecode < audio_timecode) {
+ if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
printf("Video block 0x%04x without corresponding audio block, dropping.\n",
video_timecode);
- video_frame_allocator->release_frame(pending_video_frames.front().frame);
+ QueuedFrame video_frame = pending_video_frames.front();
pending_video_frames.pop_front();
- } else if (audio_timecode < video_timecode) {
- printf("Audio block 0x%04x without corresponding video block, dropping.\n",
+ lock.unlock();
+ video_frame_allocator->release_frame(video_frame.frame);
+ } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
+ printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
audio_timecode);
- audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
+ QueuedFrame audio_frame = pending_audio_frames.front();
pending_audio_frames.pop_front();
+ lock.unlock();
+ frame_callback(audio_timecode,
+ FrameAllocator::Frame(), 0, 0x0000,
+ audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
} else {
QueuedFrame video_frame = pending_video_frames.front();
QueuedFrame audio_frame = pending_audio_frames.front();
audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
}
}
+ if (has_dequeue_callbacks) {
+ dequeue_cleanup_callback();
+ }
}
void BMUSBCapture::start_new_frame(const uint8_t *start)
uint16_t timecode = (start[1] << 8) | start[0];
if (current_video_frame.len > 0) {
+ // If format is 0x0800 (no signal), add a fake (empty) audio
+ // frame to get it out of the queue.
+ // TODO: Figure out if there are other formats that come with
+ // no audio, and treat them the same.
+ if (format == 0x0800) {
+ FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
+ if (fake_audio_frame.data == nullptr) {
+ // Oh well, it's just a no-signal frame anyway.
+ printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
+ current_video_frame.owner->release_frame(current_video_frame);
+ current_video_frame = video_frame_allocator->alloc_frame();
+ return;
+ }
+ queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
+ }
//dump_frame();
queue_frame(format, timecode, current_video_frame, &pending_video_frames);
}
int bytes = end - start;
if (current_frame->len + bytes > current_frame->size) {
- printf("%d bytes overflow after last %s frame\n",
- int(current_frame->len + bytes - current_frame->size), frame_type_name);
+ current_frame->overflow = current_frame->len + bytes - current_frame->size;
+ current_frame->len = current_frame->size;
+ if (current_frame->overflow > 1048576) {
+ printf("%d bytes overflow after last %s frame\n",
+ int(current_frame->overflow), frame_type_name);
+ current_frame->overflow = 0;
+ }
//dump_frame();
} else {
if (current_frame->interleaved) {
}
}
-#ifdef __SSE2__
+#ifdef __SSE4_1__
+
+#if 0
+void avx2_dump(const char *name, __m256i n)
+{
+ printf("%-10s:", name);
+ printf(" %02x", _mm256_extract_epi8(n, 0));
+ printf(" %02x", _mm256_extract_epi8(n, 1));
+ printf(" %02x", _mm256_extract_epi8(n, 2));
+ printf(" %02x", _mm256_extract_epi8(n, 3));
+ printf(" %02x", _mm256_extract_epi8(n, 4));
+ printf(" %02x", _mm256_extract_epi8(n, 5));
+ printf(" %02x", _mm256_extract_epi8(n, 6));
+ printf(" %02x", _mm256_extract_epi8(n, 7));
+ printf(" ");
+ printf(" %02x", _mm256_extract_epi8(n, 8));
+ printf(" %02x", _mm256_extract_epi8(n, 9));
+ printf(" %02x", _mm256_extract_epi8(n, 10));
+ printf(" %02x", _mm256_extract_epi8(n, 11));
+ printf(" %02x", _mm256_extract_epi8(n, 12));
+ printf(" %02x", _mm256_extract_epi8(n, 13));
+ printf(" %02x", _mm256_extract_epi8(n, 14));
+ printf(" %02x", _mm256_extract_epi8(n, 15));
+ printf(" ");
+ printf(" %02x", _mm256_extract_epi8(n, 16));
+ printf(" %02x", _mm256_extract_epi8(n, 17));
+ printf(" %02x", _mm256_extract_epi8(n, 18));
+ printf(" %02x", _mm256_extract_epi8(n, 19));
+ printf(" %02x", _mm256_extract_epi8(n, 20));
+ printf(" %02x", _mm256_extract_epi8(n, 21));
+ printf(" %02x", _mm256_extract_epi8(n, 22));
+ printf(" %02x", _mm256_extract_epi8(n, 23));
+ printf(" ");
+ printf(" %02x", _mm256_extract_epi8(n, 24));
+ printf(" %02x", _mm256_extract_epi8(n, 25));
+ printf(" %02x", _mm256_extract_epi8(n, 26));
+ printf(" %02x", _mm256_extract_epi8(n, 27));
+ printf(" %02x", _mm256_extract_epi8(n, 28));
+ printf(" %02x", _mm256_extract_epi8(n, 29));
+ printf(" %02x", _mm256_extract_epi8(n, 30));
+ printf(" %02x", _mm256_extract_epi8(n, 31));
+ printf("\n");
+}
+#endif
// Does a memcpy and memchr in one to reduce processing time.
// Note that the benefit is somewhat limited if your L3 cache is small,
const uint8_t *start = xfr->buffer + offset;
const uint8_t *limit = start + pack->actual_length;
while (start < limit) { // Usually runs only one iteration.
-#ifdef __SSE2__
+#ifdef __SSE4_1__
start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
if (start == limit) break;
assert(start < limit);
}
#endif
- if (libusb_submit_transfer(xfr) < 0) {
- fprintf(stderr, "error re-submitting URB\n");
+ int rc = libusb_submit_transfer(xfr);
+ if (rc < 0) {
+ fprintf(stderr, "error re-submitting URB: %s\n", libusb_error_name(rc));
exit(1);
}
}
void BMUSBCapture::usb_thread_func()
{
- printf("usb thread started\n");
-
sched_param param;
memset(¶m, 0, sizeof(param));
param.sched_priority = 1;
}
}
+struct USBCardDevice {
+ uint16_t product;
+ uint8_t bus, port;
+ libusb_device *device;
+};
+
+libusb_device_handle *open_card(int card_index)
+{
+ libusb_device **devices;
+ ssize_t num_devices = libusb_get_device_list(nullptr, &devices);
+ if (num_devices == -1) {
+ fprintf(stderr, "Error finding USB devices\n");
+ exit(1);
+ }
+ vector<USBCardDevice> found_cards;
+ for (ssize_t i = 0; i < num_devices; ++i) {
+ libusb_device_descriptor desc;
+ if (libusb_get_device_descriptor(devices[i], &desc) < 0) {
+ fprintf(stderr, "Error getting device descriptor for device %d\n", int(i));
+ exit(1);
+ }
+
+ uint8_t bus = libusb_get_bus_number(devices[i]);
+ uint8_t port = libusb_get_port_number(devices[i]);
+
+ if (!(desc.idVendor == 0x1edb && desc.idProduct == 0xbd3b) &&
+ !(desc.idVendor == 0x1edb && desc.idProduct == 0xbd4f)) {
+ libusb_unref_device(devices[i]);
+ continue;
+ }
+
+ found_cards.push_back({ desc.idProduct, bus, port, devices[i] });
+ }
+ libusb_free_device_list(devices, 0);
+
+ // Sort the devices to get a consistent ordering.
+ sort(found_cards.begin(), found_cards.end(), [](const USBCardDevice &a, const USBCardDevice &b) {
+ if (a.product != b.product)
+ return a.product < b.product;
+ if (a.bus != b.bus)
+ return a.bus < b.bus;
+ return a.port < b.port;
+ });
+
+ for (size_t i = 0; i < found_cards.size(); ++i) {
+ fprintf(stderr, "Card %d: Bus %03u Device %03u ", int(i), found_cards[i].bus, found_cards[i].port);
+ if (found_cards[i].product == 0xbd3b) {
+ fprintf(stderr, "Intensity Shuttle\n");
+ } else if (found_cards[i].product == 0xbd4f) {
+ fprintf(stderr, "UltraStudio SDI\n");
+ } else {
+ assert(false);
+ }
+ }
+
+ if (size_t(card_index) >= found_cards.size()) {
+ fprintf(stderr, "Could not open card %d (only %d found)\n", card_index, int(found_cards.size()));
+ exit(1);
+ }
+
+ libusb_device_handle *devh;
+ int rc = libusb_open(found_cards[card_index].device, &devh);
+ if (rc < 0) {
+ fprintf(stderr, "Error opening card %d: %s\n", card_index, libusb_error_name(rc));
+ exit(1);
+ }
+
+ for (size_t i = 0; i < found_cards.size(); ++i) {
+ libusb_unref_device(found_cards[i].device);
+ }
+
+ return devh;
+}
+
void BMUSBCapture::configure_card()
{
if (video_frame_allocator == nullptr) {
- set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak.
+ set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES)); // FIXME: leak.
}
if (audio_frame_allocator == nullptr) {
- set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak.
+ set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); // FIXME: leak.
}
- thread(&BMUSBCapture::dequeue_thread, this).detach();
+ dequeue_thread_should_quit = false;
+ dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
int rc;
struct libusb_transfer *xfr;
exit(1);
}
- //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
- //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
- struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
+ libusb_device_handle *devh = open_card(card_index);
if (!devh) {
fprintf(stderr, "Error finding USB device\n");
exit(1);
// Alternate setting 1 is output, alternate setting 2 is input.
// Card is reset when switching alternates, so the driver uses
// this “double switch” when it wants to reset.
+ //
+ // There's also alternate settings 3 and 4, which seem to be
+ // like 1 and 2 except they advertise less bandwidth needed.
rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
if (rc < 0) {
fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
}
rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
if (rc < 0) {
- fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
+ fprintf(stderr, "Error setting alternate 2: %s\n", libusb_error_name(rc));
exit(1);
}
#if 0
// set up isochronous transfers for audio and video
for (int e = 3; e <= 4; ++e) {
//int num_transfers = (e == 3) ? 6 : 6;
- int num_transfers = 6;
+ int num_transfers = 10;
for (int i = 0; i < num_transfers; ++i) {
int num_iso_pack, size;
if (e == 3) {
size &= ~1023;
size += 1024;
}
- num_iso_pack = (2 << 18) / size; // 512 kB.
+ num_iso_pack = (2 << 16) / size; // 128 kB.
printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
} else {
size = 0xc0;
#endif
}
+void BMUSBCapture::stop_dequeue_thread()
+{
+ dequeue_thread_should_quit = true;
+ queues_not_empty.notify_all();
+ dequeue_thread.join();
+}
+
void BMUSBCapture::start_bm_thread()
{
should_quit = false;