]> git.sesse.net Git - bmusb/blobdiff - bmusb.cpp
Release 0.7.8.
[bmusb] / bmusb.cpp
index 05bb2a22f76c03da29b9cecb1b3f57361260ccb0..19a9da1aa2c52ca1ed22c980b3c244db405970e6 100644 (file)
--- a/bmusb.cpp
+++ b/bmusb.cpp
@@ -1,4 +1,4 @@
-// Intensity Shuttle USB3 capture driver, v0.7.2
+// Intensity Shuttle USB3 capture driver, v0.7.8
 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
 // (can do captures for hours at a time with no drops), except during startup
 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
@@ -202,6 +202,7 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma
                { 0x0151,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50.
                { 0x0011,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50 (5:4).
                { 0x0143, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
+               { 0x0161, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
                { 0x0103, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
                { 0x0125, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
                { 0x0121, 1280,  720,   0, 25,  5, 60000, 1001, false },  // 720p59.94.
@@ -341,7 +342,9 @@ void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
 
 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
 {
-       fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
+       if (audiofp != nullptr) {
+               fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
+       }
 }
 
 void BMUSBCapture::dequeue_thread_func()
@@ -535,6 +538,9 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n
                }
                //dump_frame();
        } else {
+               if (current_frame->data_copy != nullptr) {
+                       memcpy(current_frame->data_copy + current_frame->len, start, bytes);
+               }
                if (current_frame->interleaved) {
                        uint8_t *data = current_frame->data + current_frame->len / 2;
                        uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
@@ -684,6 +690,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 {
        const __m256i needle = _mm256_set1_epi8(sync_char);
 
+       size_t bytes_copied;
        const __restrict __m256i *in = (const __m256i *)aligned_start;
        if (current_frame->interleaved) {
                __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
@@ -724,9 +731,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
                        ++out1;
                        ++out2;
                }
-               current_frame->len += (uint8_t *)in - aligned_start;
+               bytes_copied = (uint8_t *)in - aligned_start;
        } else {
-               __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
+               uint8_t *old_end = current_frame->data + current_frame->len;
+               __m256i *out = (__m256i *)old_end;
                while (in < (const __m256i *)limit) {
                        __m256i data = _mm256_load_si256(in);
                        _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
@@ -738,8 +746,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
                        ++in;
                        ++out;
                }
-               current_frame->len = (uint8_t *)out - current_frame->data;
+               bytes_copied = (uint8_t *)out - old_end;
        }
+       if (current_frame->data_copy != nullptr) {
+               // TODO: It would be somewhat more cache-efficient to write this in the
+               // same loop as above. However, it might not be worth the extra complexity.
+               memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
+       }
+       current_frame->len += bytes_copied;
 
        //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
        return (const uint8_t *)in;
@@ -751,6 +765,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
        const __m128i needle = _mm_set1_epi8(sync_char);
 
        const __m128i *in = (const __m128i *)aligned_start;
+       size_t bytes_copied;
        if (current_frame->interleaved) {
                __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
                __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
@@ -781,9 +796,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
                        ++out1;
                        ++out2;
                }
-               current_frame->len += (uint8_t *)in - aligned_start;
+               bytes_copied = (uint8_t *)in - aligned_start;
        } else {
-               __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
+               uint8_t *old_end = current_frame->data + current_frame->len;
+               __m128i *out = (__m128i *)old_end;
                while (in < (const __m128i *)limit) {
                        __m128i data = _mm_load_si128(in);
                        _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
@@ -795,8 +811,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
                        ++in;
                        ++out;
                }
-               current_frame->len = (uint8_t *)out - current_frame->data;
+               bytes_copied = (uint8_t *)out - old_end;
+       }
+       if (current_frame->data_copy != nullptr) {
+               // TODO: It would be somewhat more cache-efficient to write this in the
+               // same loop as above. However, it might not be worth the extra complexity.
+               memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
        }
+       current_frame->len += bytes_copied;
 
        //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
        return (const uint8_t *)in;