]> git.sesse.net Git - vlc/commitdiff
deinterlace.c: added AltiVec optims for 16-bytes unaligned lines
authorEric Petit <titer@videolan.org>
Mon, 19 Apr 2004 16:57:39 +0000 (16:57 +0000)
committerEric Petit <titer@videolan.org>
Mon, 19 Apr 2004 16:57:39 +0000 (16:57 +0000)
modules/video_filter/deinterlace.c

index 9a7b3d01d6b5efcc7cec848ce5d54bccf07482d7..f766d4a6989cd5c6c0615e9828b811c6f5a72b35 100644 (file)
@@ -928,29 +928,64 @@ static void EndMMX( void )
 static void MergeAltivec( void *_p_dest, const void *_p_s1,
                           const void *_p_s2, size_t i_bytes )
 {
-    uint8_t *p_dest = (uint8_t*)_p_dest;
-    const uint8_t *p_s1 = (const uint8_t *)_p_s1;
-    const uint8_t *p_s2 = (const uint8_t *)_p_s2;
-    uint8_t *p_end = p_dest + i_bytes - 16;
+    uint8_t *p_dest = (uint8_t *)_p_dest;
+    uint8_t *p_s1   = (uint8_t *)_p_s1;
+    uint8_t *p_s2   = (uint8_t *)_p_s2;
+    uint8_t *p_end  = p_dest + i_bytes - 15;
 
-    if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) |
-        ( (int)p_dest & 0xF ) )
+    /* Use C until the first 16-bytes aligned destination pixel */
+    while( (int)p_dest & 0xF )
     {
-        /* TODO Handle non 16-bytes aligned planes */
-        MergeGeneric( _p_dest, _p_s1, _p_s2, i_bytes );
-        return;
+        *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
     }
 
-    while( p_dest < p_end )
+    if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
     {
-        vec_st( vec_avg( vec_ld( 0, p_s1 ), vec_ld( 0, p_s2 ) ),
-                0, p_dest );
-        p_s1   += 16;
-        p_s2   += 16;
-        p_dest += 16;
+        /* Unaligned source */
+        vector unsigned char s1v, s2v, destv;
+        vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
+        vector unsigned char perm1v, perm2v;
+
+        perm1v = vec_lvsl( 0, p_s1 );
+        perm2v = vec_lvsl( 0, p_s2 );
+        s1oldv = vec_ld( 0, p_s1 );
+        s2oldv = vec_ld( 0, p_s2 );
+
+        while( p_dest < p_end )
+        {
+            s1newv = vec_ld( 16, p_s1 );
+            s2newv = vec_ld( 16, p_s2 );
+            s1v    = vec_perm( s1oldv, s1newv, perm1v );
+            s2v    = vec_perm( s2oldv, s2newv, perm2v );
+            s1oldv = s1newv;
+            s2oldv = s2newv;
+            destv  = vec_avg( s1v, s2v );
+            vec_st( destv, 0, p_dest );
+
+            p_s1   += 16;
+            p_s2   += 16;
+            p_dest += 16;
+        }
     }
+    else
+    {
+        /* Aligned source */
+        vector unsigned char s1v, s2v, destv;
 
-    p_end += 16;
+        while( p_dest < p_end )
+        {
+            s1v   = vec_ld( 0, p_s1 );
+            s2v   = vec_ld( 0, p_s2 );
+            destv = vec_avg( s1v, s2v );
+            vec_st( destv, 0, p_dest );
+
+            p_s1   += 16;
+            p_s2   += 16;
+            p_dest += 16;
+        }
+    }
+
+    p_end += 15;
 
     while( p_dest < p_end )
     {