]> git.sesse.net Git - mlt/commitdiff
implement SSE optimized swab function
authorMaksym Veremeyenko <verem@m1.tv>
Thu, 13 Feb 2014 14:27:23 +0000 (16:27 +0200)
committerDan Dennedy <dan@dennedy.org>
Sun, 16 Feb 2014 18:38:14 +0000 (10:38 -0800)
src/modules/decklink/common.cpp
src/modules/decklink/common.h
src/modules/decklink/consumer_decklink.cpp
src/modules/decklink/producer_decklink.cpp

index 83f5e531b099fb103e8dc724a42975d56ef67028..cafdda9f18964f257bd5d218e0cf0207f558fb81 100644 (file)
@@ -19,6 +19,7 @@
 
 #include "common.h"
 #include <stdlib.h>
+#include <unistd.h>
 
 #ifdef __DARWIN__
 
@@ -89,3 +90,46 @@ void freeDLString( DLString aDLString )
 
 #endif
 
+
+void swab2( const void *from, void *to, int n )
+{
+#if defined(USE_SSE)
+#define SWAB_STEP 16
+       __asm__ volatile
+       (
+               "loop_start:                            \n\t"
+
+               /* load */
+               "movdqa         0(%[from]), %%xmm0      \n\t"
+               "add            $0x10, %[from]          \n\t"
+
+               /* duplicate to temp registers */
+               "movdqa         %%xmm0, %%xmm1          \n\t"
+
+               /* shift right temp register */
+               "psrlw          $8, %%xmm1              \n\t"
+
+               /* shift left main register */
+               "psllw          $8, %%xmm0              \n\t"
+
+               /* compose them back */
+               "por           %%xmm0, %%xmm1           \n\t"
+
+               /* save */
+               "movdqa         %%xmm1, 0(%[to])        \n\t"
+               "add            $0x10, %[to]            \n\t"
+
+               "dec            %[cnt]                  \n\t"
+               "jnz            loop_start              \n\t"
+
+               :
+               : [from]"r"(from), [to]"r"(to), [cnt]"r"(n / SWAB_STEP)
+               : "xmm0", "xmm1"
+       );
+
+       from = (unsigned char*) from + n - (n % SWAB_STEP);
+       to = (unsigned char*) to + n - (n % SWAB_STEP);
+       n = (n % SWAB_STEP);
+#endif
+       swab(from, to, n);
+};
index 3b48b9c01361c8d213445790a81371faf61b3dc7..98a853617bd01d4e3984fbcacfa2c1ad7e101d46 100644 (file)
@@ -38,5 +38,6 @@
 char* getCString( DLString aDLString );
 void freeCString( char* aCString );
 void freeDLString( DLString aDLString );
+void swab2( const void *from, void *to, int n );
 
 #endif // DECKLINK_COMMON_H
index 8f01dea509d0d4574761151c6bcf5d77a1dff724..ac6f5a92491c1743d668ffb6d5eb5acbc8133415 100644 (file)
@@ -416,9 +416,9 @@ public:
                                        // Normal non-keyer playout - needs byte swapping
                                        if ( !progressive && m_displayMode->GetFieldDominance() == bmdUpperFieldFirst )
                                                // convert lower field first to top field first
-                                               swab( (char*) image, (char*) buffer + stride, stride * ( height - 1 ) );
+                                               swab2( (char*) image, (char*) buffer + stride, stride * ( height - 1 ) );
                                        else
-                                               swab( (char*) image, (char*) buffer, stride * height );
+                                               swab2( (char*) image, (char*) buffer, stride * height );
                                }
                                else if ( !mlt_properties_get_int( MLT_FRAME_PROPERTIES( frame ), "test_image" ) )
                                {
index 94348938ef0e5975e2716023abfb29acddb9c318..6801fad8deb980b9fafdd0f64f81f221c7cad0e4 100644 (file)
@@ -432,7 +432,7 @@ public:
                                                for ( int i = 1; i < m_vancLines + 1; i++ )
                                                {
                                                        if ( vanc->GetBufferForVerticalBlankingLine( i, &buffer ) == S_OK )
-                                                               swab( (char*) buffer, (char*) image + ( i - 1 ) * video->GetRowBytes(), video->GetRowBytes() );
+                                                               swab2( (char*) buffer, (char*) image + ( i - 1 ) * video->GetRowBytes(), video->GetRowBytes() );
                                                        else
                                                                mlt_log_debug( getProducer(), "failed capture vanc line %d\n", i );
                                                }
@@ -445,7 +445,7 @@ public:
                                if ( image && buffer )
                                {
                                        size =  video->GetRowBytes() * video->GetHeight();
-                                       swab( (char*) buffer, (char*) image + m_vancLines * video->GetRowBytes(), size );
+                                       swab2( (char*) buffer, (char*) image + m_vancLines * video->GetRowBytes(), size );
                                        mlt_frame_set_image( frame, (uint8_t*) image, size, mlt_pool_release );
                                }
                                else if ( image )