static void MergeAltivec( void *_p_dest, const void *_p_s1,
const void *_p_s2, size_t i_bytes )
{
- uint8_t *p_dest = (uint8_t*)_p_dest;
- const uint8_t *p_s1 = (const uint8_t *)_p_s1;
- const uint8_t *p_s2 = (const uint8_t *)_p_s2;
- uint8_t *p_end = p_dest + i_bytes - 16;
+ uint8_t *p_dest = (uint8_t *)_p_dest;
+ uint8_t *p_s1 = (uint8_t *)_p_s1;
+ uint8_t *p_s2 = (uint8_t *)_p_s2;
+ uint8_t *p_end = p_dest + i_bytes - 15;
- if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) |
- ( (int)p_dest & 0xF ) )
+ /* Use C until the first 16-bytes aligned destination pixel */
+ while( (int)p_dest & 0xF )
{
- /* TODO Handle non 16-bytes aligned planes */
- MergeGeneric( _p_dest, _p_s1, _p_s2, i_bytes );
- return;
+ *p_dest++ = ( (uint16_t)(*p_s1++) + (uint16_t)(*p_s2++) ) >> 1;
}
- while( p_dest < p_end )
+ if( ( (int)p_s1 & 0xF ) | ( (int)p_s2 & 0xF ) )
{
- vec_st( vec_avg( vec_ld( 0, p_s1 ), vec_ld( 0, p_s2 ) ),
- 0, p_dest );
- p_s1 += 16;
- p_s2 += 16;
- p_dest += 16;
+ /* Unaligned source */
+ vector unsigned char s1v, s2v, destv;
+ vector unsigned char s1oldv, s2oldv, s1newv, s2newv;
+ vector unsigned char perm1v, perm2v;
+
+ perm1v = vec_lvsl( 0, p_s1 );
+ perm2v = vec_lvsl( 0, p_s2 );
+ s1oldv = vec_ld( 0, p_s1 );
+ s2oldv = vec_ld( 0, p_s2 );
+
+ while( p_dest < p_end )
+ {
+ s1newv = vec_ld( 16, p_s1 );
+ s2newv = vec_ld( 16, p_s2 );
+ s1v = vec_perm( s1oldv, s1newv, perm1v );
+ s2v = vec_perm( s2oldv, s2newv, perm2v );
+ s1oldv = s1newv;
+ s2oldv = s2newv;
+ destv = vec_avg( s1v, s2v );
+ vec_st( destv, 0, p_dest );
+
+ p_s1 += 16;
+ p_s2 += 16;
+ p_dest += 16;
+ }
}
+ else
+ {
+ /* Aligned source */
+ vector unsigned char s1v, s2v, destv;
- p_end += 16;
+ while( p_dest < p_end )
+ {
+ s1v = vec_ld( 0, p_s1 );
+ s2v = vec_ld( 0, p_s2 );
+ destv = vec_avg( s1v, s2v );
+ vec_st( destv, 0, p_dest );
+
+ p_s1 += 16;
+ p_s2 += 16;
+ p_dest += 16;
+ }
+ }
+
+ p_end += 15;
while( p_dest < p_end )
{