]> git.sesse.net Git - nageru/blobdiff - decklink_capture.cpp
Save another ~1% on the audio benchmark, by dealing with the std::deque more efficien...
[nageru] / decklink_capture.cpp
index df8b668f28c26b4552bb3fa0b07e297934999ed4..81566cb1a5ad20d85a3be25a8b01e2c24a62906e 100644 (file)
@@ -20,6 +20,7 @@
 
 using namespace std;
 using namespace std::placeholders;
+using namespace bmusb;
 
 namespace {
 
@@ -58,7 +59,7 @@ size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t
                memcpy_interleaved(dest1, dest2, src, n2);
                dest1 += n2 / 2;
                dest2 += n2 / 2;
-               if (n2 % 1) {
+               if (n2 % 2) {
                        swap(dest1, dest2);
                }
                src = aligned_src;
@@ -72,9 +73,9 @@ size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t
        assert(((limit - src) % 64) == 0);
 
 #if __AVX2__
-       const __restrict __m256i *in = (const __m256i *)src;
-       __restrict __m256i *out1 = (__m256i *)dest1;
-       __restrict __m256i *out2 = (__m256i *)dest2;
+       const __m256i * __restrict in = (const __m256i *)src;
+       __m256i * __restrict out1 = (__m256i *)dest1;
+       __m256i * __restrict out2 = (__m256i *)dest2;
 
        __m256i shuffle_cw = _mm256_set_epi8(
                15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
@@ -102,9 +103,9 @@ size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t
                consumed += 64;
        }
 #else
-       const __restrict __m128i *in = (const __m128i *)src;
-       __restrict __m128i *out1 = (__m128i *)dest1;
-       __restrict __m128i *out2 = (__m128i *)dest2;
+       const __m128i * __restrict in = (const __m128i *)src;
+       __m128i * __restrict out1 = (__m128i *)dest1;
+       __m128i * __restrict out2 = (__m128i *)dest2;
 
        __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
        while (in < (const __m128i *)limit) {
@@ -134,15 +135,15 @@ size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t
 }  // namespace
 
 DeckLinkCapture::DeckLinkCapture(IDeckLink *card, int card_index)
-       : card_index(card_index)
+       : card_index(card_index), card(card)
 {
        {
                const char *model_name;
                char buf[256];
                if (card->GetModelName(&model_name) == S_OK) {
-                       snprintf(buf, sizeof(buf), "Card %d: %s", card_index, model_name);
+                       snprintf(buf, sizeof(buf), "PCI card %d: %s", card_index, model_name);
                } else {
-                       snprintf(buf, sizeof(buf), "Card %d: Unknown DeckLink card", card_index);
+                       snprintf(buf, sizeof(buf), "PCI card %d: Unknown DeckLink card", card_index);
                }
                description = buf;
        }
@@ -262,6 +263,9 @@ DeckLinkCapture::~DeckLinkCapture()
        if (has_dequeue_callbacks) {
                dequeue_cleanup_callback();
        }
+       input->Release();
+       config->Release();
+       card->Release();
 }
 
 HRESULT STDMETHODCALLTYPE DeckLinkCapture::QueryInterface(REFIID, LPVOID *)
@@ -381,10 +385,12 @@ HRESULT STDMETHODCALLTYPE DeckLinkCapture::VideoInputFrameArrived(
 void DeckLinkCapture::configure_card()
 {
        if (video_frame_allocator == nullptr) {
-               set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));  // FIXME: leak.
+               owned_video_frame_allocator.reset(new MallocFrameAllocator(FRAME_SIZE, NUM_QUEUED_VIDEO_FRAMES));
+               set_video_frame_allocator(owned_video_frame_allocator.get());
        }
        if (audio_frame_allocator == nullptr) {
-               set_audio_frame_allocator(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));  // FIXME: leak.
+               owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
+               set_audio_frame_allocator(owned_audio_frame_allocator.get());
        }
 }