X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=fake_capture.cpp;h=7d23d8cafdc70df47f123f06200b9730c5897742;hb=08a992d58c34da8d8bbd70226f7e85c9f30d9514;hp=53d0db896a958303e16300bd153d2ce2cfb3bb35;hpb=fed1b7ce38631cb850270be79e136f8343451971;p=nageru diff --git a/fake_capture.cpp b/fake_capture.cpp index 53d0db8..7d23d8c 100644 --- a/fake_capture.cpp +++ b/fake_capture.cpp @@ -10,6 +10,9 @@ #include #include #include +#if __SSE2__ +#include +#endif #include #include "bmusb/bmusb.h" @@ -26,13 +29,28 @@ constexpr uint8_t crs[NUM_COLORS] = { 240, 34, 110, 128 }; using namespace std; +namespace bmusb { namespace { -// TODO: SSE2-optimize (or at least write full int64s) if speed becomes a problem. - void memset2(uint8_t *s, const uint8_t c[2], size_t n) { - for (size_t i = 0; i < n; ++i) { + size_t i = 0; +#if __SSE2__ + const uint8_t c_expanded[16] = { + c[0], c[1], c[0], c[1], c[0], c[1], c[0], c[1], + c[0], c[1], c[0], c[1], c[0], c[1], c[0], c[1] + }; + __m128i cc = *(__m128i *)c_expanded; + __m128i *out = (__m128i *)s; + + for ( ; i < (n & ~15); i += 16) { + _mm_storeu_si128(out++, cc); + _mm_storeu_si128(out++, cc); + } + + s = (uint8_t *)out; +#endif + for ( ; i < n; ++i) { *s++ = c[0]; *s++ = c[1]; } @@ -40,7 +58,23 @@ void memset2(uint8_t *s, const uint8_t c[2], size_t n) void memset4(uint8_t *s, const uint8_t c[4], size_t n) { - for (size_t i = 0; i < n; ++i) { + size_t i = 0; +#if __SSE2__ + const uint8_t c_expanded[16] = { + c[0], c[1], c[2], c[3], c[0], c[1], c[2], c[3], + c[0], c[1], c[2], c[3], c[0], c[1], c[2], c[3] + }; + __m128i cc = *(__m128i *)c_expanded; + __m128i *out = (__m128i *)s; + + for ( ; i < (n & ~7); i += 8) { + _mm_storeu_si128(out++, cc); + _mm_storeu_si128(out++, cc); + } + + s = (uint8_t *)out; +#endif + for ( ; i < n; ++i) { *s++ = c[0]; *s++ = c[1]; *s++ = c[2]; @@ -195,6 +229,7 @@ void FakeCapture::producer_thread_func() video_format.frame_rate_nom = FAKE_FPS; video_format.frame_rate_den = 1; video_format.has_signal = true; + video_format.is_connected = false; FrameAllocator::Frame video_frame = video_frame_allocator->alloc_frame(); if (video_frame.data != nullptr) { @@ -229,3 +264,5 @@ void FakeCapture::producer_thread_func() dequeue_cleanup_callback(); } } + +} // namespace bmusb