]> git.sesse.net Git - bmusb/blobdiff - bmusb.cpp
Fix wraparound in some timecode comparisons.
[bmusb] / bmusb.cpp
index 00f06e443f387f1b0218ae8284a53b00a7b173da..387e042900ea82bc10d3f7775e1915c7f8113c1e 100644 (file)
--- a/bmusb.cpp
+++ b/bmusb.cpp
@@ -4,29 +4,31 @@
 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
 // Audio comes out as 8-channel 24-bit raw audio.
 
+#include <assert.h>
+#include <errno.h>
+#include <libusb.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <libusb.h>
-#include <arpa/inet.h>
-#include <unistd.h>
 #include <string.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <assert.h>
-#ifdef __SSE2__
+#ifdef __SSE4_1__
 #include <immintrin.h>
 #endif
+#include "bmusb.h"
+
 #include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <cstddef>
+#include <cstdint>
+#include <deque>
 #include <functional>
 #include <memory>
-#include <deque>
-#include <utility>
 #include <mutex>
-#include <condition_variable>
-#include <thread>
 #include <stack>
-#include <atomic>
-#include "bmusb.h"
+#include <thread>
 
 using namespace std;
 using namespace std::placeholders;
@@ -45,9 +47,12 @@ using namespace std::placeholders;
 
 FILE *audiofp;
 
+thread usb_thread;
+atomic<bool> should_quit;
+
 FrameAllocator::~FrameAllocator() {}
 
-#define NUM_QUEUED_FRAMES 8
+#define NUM_QUEUED_FRAMES 16
 class MallocFrameAllocator : public FrameAllocator {
 public:
        MallocFrameAllocator(size_t frame_size);
@@ -88,6 +93,9 @@ FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
 
 void MallocFrameAllocator::release_frame(Frame frame)
 {
+       if (frame.overflow > 0) {
+               printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
+       }
        unique_lock<mutex> lock(freelist_mutex);
        freelist.push(unique_ptr<uint8_t[]>(frame.data));
 }
@@ -139,24 +147,31 @@ void dump_audio_block(uint8_t *audio_start, size_t audio_len)
        fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
 }
 
-void BMUSBCapture::dequeue_thread()
+void BMUSBCapture::dequeue_thread_func()
 {
-       for ( ;; ) {
+       if (has_dequeue_callbacks) {
+               dequeue_init_callback();
+       }
+       while (!dequeue_thread_should_quit) {
                unique_lock<mutex> lock(queue_lock);
-               queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
+               queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
 
                uint16_t video_timecode = pending_video_frames.front().timecode;
                uint16_t audio_timecode = pending_audio_frames.front().timecode;
-               if (video_timecode < audio_timecode) {
+               if (uint16_less_than_with_wraparound(video_timecode, audio_timecode)) {
                        printf("Video block 0x%04x without corresponding audio block, dropping.\n",
                                video_timecode);
                        video_frame_allocator->release_frame(pending_video_frames.front().frame);
                        pending_video_frames.pop_front();
-               } else if (audio_timecode < video_timecode) {
-                       printf("Audio block 0x%04x without corresponding video block, dropping.\n",
+               } else if (uint16_less_than_with_wraparound(audio_timecode, video_timecode)) {
+                       printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
                                audio_timecode);
-                       audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
+                       QueuedFrame audio_frame = pending_audio_frames.front();
                        pending_audio_frames.pop_front();
+                       lock.unlock();
+                       frame_callback(audio_timecode,
+                                      FrameAllocator::Frame(), 0, 0x0000,
+                                      audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
                } else {
                        QueuedFrame video_frame = pending_video_frames.front();
                        QueuedFrame audio_frame = pending_audio_frames.front();
@@ -176,6 +191,9 @@ void BMUSBCapture::dequeue_thread()
                                       audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
                }
        }
+       if (has_dequeue_callbacks) {
+               dequeue_cleanup_callback();
+       }
 }
 
 void BMUSBCapture::start_new_frame(const uint8_t *start)
@@ -184,6 +202,21 @@ void BMUSBCapture::start_new_frame(const uint8_t *start)
        uint16_t timecode = (start[1] << 8) | start[0];
 
        if (current_video_frame.len > 0) {
+               // If format is 0x0800 (no signal), add a fake (empty) audio
+               // frame to get it out of the queue.
+               // TODO: Figure out if there are other formats that come with
+               // no audio, and treat them the same.
+               if (format == 0x0800) {
+                       FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
+                       if (fake_audio_frame.data == nullptr) {
+                               // Oh well, it's just a no-signal frame anyway.
+                               printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
+                               current_video_frame.owner->release_frame(current_video_frame);
+                               current_video_frame = video_frame_allocator->alloc_frame();
+                               return;
+                       }
+                       queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
+               }
                //dump_frame();
                queue_frame(format, timecode, current_video_frame, &pending_video_frames);
        }
@@ -252,8 +285,13 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n
 
        int bytes = end - start;
        if (current_frame->len + bytes > current_frame->size) {
-               printf("%d bytes overflow after last %s frame\n",
-                       int(current_frame->len + bytes - current_frame->size), frame_type_name);
+               current_frame->overflow = current_frame->len + bytes - current_frame->size;
+               current_frame->len = current_frame->size;
+               if (current_frame->overflow > 1048576) {
+                       printf("%d bytes overflow after last %s frame\n",
+                               int(current_frame->overflow), frame_type_name);
+                       current_frame->overflow = 0;
+               }
                //dump_frame();
        } else {
                if (current_frame->interleaved) {
@@ -278,7 +316,50 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n
        }
 }
 
-#ifdef __SSE2__
+#ifdef __SSE4_1__
+
+#if 0
+void avx2_dump(const char *name, __m256i n)
+{
+       printf("%-10s:", name);
+       printf(" %02x", _mm256_extract_epi8(n, 0));
+       printf(" %02x", _mm256_extract_epi8(n, 1));
+       printf(" %02x", _mm256_extract_epi8(n, 2));
+       printf(" %02x", _mm256_extract_epi8(n, 3));
+       printf(" %02x", _mm256_extract_epi8(n, 4));
+       printf(" %02x", _mm256_extract_epi8(n, 5));
+       printf(" %02x", _mm256_extract_epi8(n, 6));
+       printf(" %02x", _mm256_extract_epi8(n, 7));
+       printf(" ");
+       printf(" %02x", _mm256_extract_epi8(n, 8));
+       printf(" %02x", _mm256_extract_epi8(n, 9));
+       printf(" %02x", _mm256_extract_epi8(n, 10));
+       printf(" %02x", _mm256_extract_epi8(n, 11));
+       printf(" %02x", _mm256_extract_epi8(n, 12));
+       printf(" %02x", _mm256_extract_epi8(n, 13));
+       printf(" %02x", _mm256_extract_epi8(n, 14));
+       printf(" %02x", _mm256_extract_epi8(n, 15));
+       printf(" ");
+       printf(" %02x", _mm256_extract_epi8(n, 16));
+       printf(" %02x", _mm256_extract_epi8(n, 17));
+       printf(" %02x", _mm256_extract_epi8(n, 18));
+       printf(" %02x", _mm256_extract_epi8(n, 19));
+       printf(" %02x", _mm256_extract_epi8(n, 20));
+       printf(" %02x", _mm256_extract_epi8(n, 21));
+       printf(" %02x", _mm256_extract_epi8(n, 22));
+       printf(" %02x", _mm256_extract_epi8(n, 23));
+       printf(" ");
+       printf(" %02x", _mm256_extract_epi8(n, 24));
+       printf(" %02x", _mm256_extract_epi8(n, 25));
+       printf(" %02x", _mm256_extract_epi8(n, 26));
+       printf(" %02x", _mm256_extract_epi8(n, 27));
+       printf(" %02x", _mm256_extract_epi8(n, 28));
+       printf(" %02x", _mm256_extract_epi8(n, 29));
+       printf(" %02x", _mm256_extract_epi8(n, 30));
+       printf(" %02x", _mm256_extract_epi8(n, 31));
+       printf("\n");
+}
+#endif
 
 // Does a memcpy and memchr in one to reduce processing time.
 // Note that the benefit is somewhat limited if your L3 cache is small,
@@ -469,7 +550,7 @@ void decode_packs(const libusb_transfer *xfr,
                const uint8_t *start = xfr->buffer + offset;
                const uint8_t *limit = start + pack->actual_length;
                while (start < limit) {  // Usually runs only one iteration.
-#ifdef __SSE2__
+#ifdef __SSE4_1__
                        start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
                        if (start == limit) break;
                        assert(start < limit);
@@ -559,8 +640,6 @@ void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
 
 void BMUSBCapture::usb_thread_func()
 {
-       printf("usb thread started\n");
-
        sched_param param;
        memset(&param, 0, sizeof(param));
        param.sched_priority = 1;
@@ -574,7 +653,7 @@ void BMUSBCapture::usb_thread_func()
        }
 }
 
-void BMUSBCapture::start_bm_capture()
+void BMUSBCapture::configure_card()
 {
        if (video_frame_allocator == nullptr) {
                set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
@@ -582,11 +661,11 @@ void BMUSBCapture::start_bm_capture()
        if (audio_frame_allocator == nullptr) {
                set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
        }
-       thread(&BMUSBCapture::dequeue_thread, this).detach();
+       dequeue_thread_should_quit = false;
+       dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
 
        int rc;
        struct libusb_transfer *xfr;
-       vector<libusb_transfer *> iso_xfrs;
 
        rc = libusb_init(nullptr);
        if (rc < 0) {
@@ -594,7 +673,9 @@ void BMUSBCapture::start_bm_capture()
                exit(1);
        }
 
-       struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
+       //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
+       //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
+       struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
        if (!devh) {
                fprintf(stderr, "Error finding USB device\n");
                exit(1);
@@ -635,6 +716,9 @@ void BMUSBCapture::start_bm_capture()
        // Alternate setting 1 is output, alternate setting 2 is input.
        // Card is reset when switching alternates, so the driver uses
        // this “double switch” when it wants to reset.
+       //
+       // There's also alternate settings 3 and 4, which seem to be
+       // like 1 and 2 except they advertise less bandwidth needed.
        rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
        if (rc < 0) {
                fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
@@ -829,7 +913,7 @@ void BMUSBCapture::start_bm_capture()
                                        size &= ~1023;
                                        size += 1024;
                                }
-                               num_iso_pack = (2 << 20) / size;  // 2 MB.
+                               num_iso_pack = (2 << 18) / size;  // 512 kB.
                                printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
                        } else {
                                size = 0xc0;
@@ -852,24 +936,24 @@ void BMUSBCapture::start_bm_capture()
                        iso_xfrs.push_back(xfr);
                }
        }
+}
 
-       {
-               int i = 0;
-               for (libusb_transfer *xfr : iso_xfrs) {
-                       rc = libusb_submit_transfer(xfr);
-                       ++i;
-                       if (rc < 0) {
-                               //printf("num_bytes=%d\n", num_bytes);
-                               fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
-                                       xfr->endpoint, i, libusb_error_name(rc));
-                               exit(1);
-                       }
+void BMUSBCapture::start_bm_capture()
+{
+       printf("starting capture\n");
+       int i = 0;
+       for (libusb_transfer *xfr : iso_xfrs) {
+               printf("submitting transfer...\n");
+               int rc = libusb_submit_transfer(xfr);
+               ++i;
+               if (rc < 0) {
+                       //printf("num_bytes=%d\n", num_bytes);
+                       fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
+                               xfr->endpoint, i, libusb_error_name(rc));
+                       exit(1);
                }
        }
 
-       should_quit = false;
-       usb_thread = thread(&BMUSBCapture::usb_thread_func, this);
-
 
 #if 0
        libusb_release_interface(devh, 0);
@@ -881,7 +965,20 @@ out:
 #endif
 }
 
-void BMUSBCapture::stop_bm_capture()
+void BMUSBCapture::stop_dequeue_thread()
+{
+       dequeue_thread_should_quit = true;
+       queues_not_empty.notify_all();
+       dequeue_thread.join();
+}
+
+void BMUSBCapture::start_bm_thread()
+{
+       should_quit = false;
+       usb_thread = thread(&BMUSBCapture::usb_thread_func);
+}
+
+void BMUSBCapture::stop_bm_thread()
 {
        should_quit = true;
        usb_thread.join();