}
}
+#ifdef WORDS_BIGENDIAN
+#define VSLD(a,b,n) vec_sld(a,b,n)
+#else
+#define VSLD(a,b,n) vec_sld(b,a,16-n)
+#endif
+
static void mc_chroma_altivec_4xh( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride,
uint8_t *src, intptr_t i_src_stride,
int mvx, int mvy, int i_height )
vec_u16_t src0v_16, src1v_16, src2v_16, src3v_16, dstv16;
vec_u16_t shiftv, k32v;
+#ifdef WORDS_BIGENDIAN
static const vec_u8_t perm0v = CV(1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13);
static const vec_u8_t perm1v = CV(3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15);
+#else
+ static const vec_u8_t perm0v = CV(0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12);
+ static const vec_u8_t perm1v = CV(2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14);
+#endif
coeff0v = vec_ld( 0, coeff );
coeff3v = vec_splat( coeff0v, 3 );
VEC_LOAD( src, src2v_8, 9, vec_u8_t, src );
src2v_16 = vec_u8_to_u16( src2v_8 );
- src3v_16 = vec_u8_to_u16( vec_sld( src2v_8, src2v_8, 2 ) );
+ src3v_16 = vec_u8_to_u16( VSLD( src2v_8, src2v_8, 2 ) );
for( int y = 0; y < i_height; y += 2 )
{
src1v_16 = src3v_16;
VEC_LOAD( srcp, src2v_8, 9, vec_u8_t, src );
src2v_16 = vec_u8_to_u16( src2v_8 );
- src3v_16 = vec_u8_to_u16( vec_sld( src2v_8, src2v_8, 2 ) );
+ src3v_16 = vec_u8_to_u16( VSLD( src2v_8, src2v_8, 2 ) );
dstv16 = vec_mladd( coeff0v, src0v_16, k32v );
dstv16 = vec_mladd( coeff1v, src1v_16, dstv16 );
src1v_16 = src3v_16;
VEC_LOAD( srcp, src2v_8, 9, vec_u8_t, src );
src2v_16 = vec_u8_to_u16( src2v_8 );
- src3v_16 = vec_u8_to_u16( vec_sld( src2v_8, src2v_8, 2 ) );
+ src3v_16 = vec_u8_to_u16( VSLD( src2v_8, src2v_8, 2 ) );
dstv16 = vec_mladd( coeff0v, src0v_16, k32v );
dstv16 = vec_mladd( coeff1v, src1v_16, dstv16 );
k32v = vec_sl( vec_splat_u16( 1 ), vec_splat_u16( 5 ) );
shiftv = vec_splat_u16( 6 );
+#ifdef WORDS_BIGENDIAN
static const vec_u8_t perm0v = CV(1,5,9,13,17,21,25,29,0,0,0,0,0,0,0,0);
static const vec_u8_t perm1v = CV(3,7,11,15,19,23,27,31,0,0,0,0,0,0,0,0);
+#else
+ static const vec_u8_t perm0v = CV(0,4,8,12,16,20,24,28,1,1,1,1,1,1,1,1);
+ static const vec_u8_t perm1v = CV(2,6,10,14,18,22,26,30,1,1,1,1,1,1,1,1);
+#endif
VEC_LOAD( src, src2v_8, 16, vec_u8_t, src );
VEC_LOAD( src+16, src3v_8, 2, vec_u8_t, src );
- src3v_8 = vec_sld( src2v_8, src3v_8, 2 );
+ src3v_8 = VSLD( src2v_8, src3v_8, 2 );
for( int y = 0; y < i_height; y += 2 )
{
VEC_LOAD( srcp, src2v_8, 16, vec_u8_t, src );
VEC_LOAD( srcp+16, src3v_8, 2, vec_u8_t, src );
- src3v_8 = vec_sld( src2v_8, src3v_8, 2 );
+ src3v_8 = VSLD( src2v_8, src3v_8, 2 );
src0v_16h = vec_u8_to_u16_h( src0v_8 );
src0v_16l = vec_u8_to_u16_l( src0v_8 );
VEC_LOAD( srcp, src2v_8, 16, vec_u8_t, src );
VEC_LOAD( srcp+16, src3v_8, 2, vec_u8_t, src );
- src3v_8 = vec_sld( src2v_8, src3v_8, 2 );
+ src3v_8 = VSLD( src2v_8, src3v_8, 2 );
src0v_16h = vec_u8_to_u16_h( src0v_8 );
src0v_16l = vec_u8_to_u16_l( src0v_8 );
VEC_LOAD_G( &src[x- 2+i_stride*y], src1v, 16, vec_u8_t); \
VEC_LOAD_G( &src[x+14+i_stride*y], src6v, 16, vec_u8_t); \
\
- src2v = vec_sld( src1v, src6v, 1 ); \
- src3v = vec_sld( src1v, src6v, 2 ); \
- src4v = vec_sld( src1v, src6v, 3 ); \
- src5v = vec_sld( src1v, src6v, 4 ); \
- src6v = vec_sld( src1v, src6v, 5 ); \
+ src2v = VSLD( src1v, src6v, 1 ); \
+ src3v = VSLD( src1v, src6v, 2 ); \
+ src4v = VSLD( src1v, src6v, 3 ); \
+ src5v = VSLD( src1v, src6v, 4 ); \
+ src6v = VSLD( src1v, src6v, 5 ); \
\
temp1v = vec_u8_to_s16_h( src1v ); \
temp2v = vec_u8_to_s16_h( src2v ); \
#define HPEL_FILTER_CENTRAL() \
{ \
- temp1v = vec_sld( tempav, tempbv, 12 ); \
- temp2v = vec_sld( tempav, tempbv, 14 ); \
+ temp1v = VSLD( tempav, tempbv, 12 ); \
+ temp2v = VSLD( tempav, tempbv, 14 ); \
temp3v = tempbv; \
- temp4v = vec_sld( tempbv, tempcv, 2 ); \
- temp5v = vec_sld( tempbv, tempcv, 4 ); \
- temp6v = vec_sld( tempbv, tempcv, 6 ); \
+ temp4v = VSLD( tempbv, tempcv, 2 ); \
+ temp5v = VSLD( tempbv, tempcv, 4 ); \
+ temp6v = VSLD( tempbv, tempcv, 6 ); \
\
HPEL_FILTER_2( temp1v, temp2v, temp3v, \
temp4v, temp5v, temp6v ); \
dest1v = vec_add( temp1v, thirtytwov ); \
dest1v = vec_sra( dest1v, sixv ); \
\
- temp1v = vec_sld( tempbv, tempcv, 12 ); \
- temp2v = vec_sld( tempbv, tempcv, 14 ); \
+ temp1v = VSLD( tempbv, tempcv, 12 ); \
+ temp2v = VSLD( tempbv, tempcv, 14 ); \
temp3v = tempcv; \
- temp4v = vec_sld( tempcv, tempdv, 2 ); \
- temp5v = vec_sld( tempcv, tempdv, 4 ); \
- temp6v = vec_sld( tempcv, tempdv, 6 ); \
+ temp4v = VSLD( tempcv, tempdv, 2 ); \
+ temp5v = VSLD( tempcv, tempdv, 4 ); \
+ temp6v = VSLD( tempcv, tempdv, 6 ); \
\
HPEL_FILTER_2( temp1v, temp2v, temp3v, \
temp4v, temp5v, temp6v ); \
vec_u8_t lv, hv, src1p1v;
vec_u8_t avg0v, avg1v, avghv, avghp1v, avgleftv, avgrightv;
static const vec_u8_t inverse_bridge_shuffle = CV(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E );
+#ifndef WORDS_BIGENDIAN
+ static const vec_u8_t inverse_bridge_shuffle_1 = CV(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F );
+#endif
for( int y = 0; y < height; y++ )
{
src1p1v = vec_ld(16*(x*2+2), src1);
avghp1v = vec_avg(lv, src1p1v);
- avgleftv = vec_avg(vec_sld(avg0v, avghv, 1), avg0v);
- avgrightv = vec_avg(vec_sld(avghv, avghp1v, 1), avghv);
+ avgleftv = vec_avg(VSLD(avg0v, avghv, 1), avg0v);
+ avgrightv = vec_avg(VSLD(avghv, avghp1v, 1), avghv);
vec_st(vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle), 16*x, dst0);
+#ifdef WORDS_BIGENDIAN
vec_st((vec_u8_t)vec_pack((vec_u16_t)avgleftv,(vec_u16_t)avgrightv), 16*x, dsth);
+#else
+ vec_st(vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle_1), 16*x, dsth);
+#endif
avg0v = avghp1v;
hv = vec_ld(16*(x*2+2), src2);
avghp1v = vec_avg(src1p1v, hv);
- avgleftv = vec_avg(vec_sld(avg1v, avghv, 1), avg1v);
- avgrightv = vec_avg(vec_sld(avghv, avghp1v, 1), avghv);
+ avgleftv = vec_avg(VSLD(avg1v, avghv, 1), avg1v);
+ avgrightv = vec_avg(VSLD(avghv, avghp1v, 1), avghv);
vec_st(vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle), 16*x, dstv);
+#ifdef WORDS_BIGENDIAN
vec_st((vec_u8_t)vec_pack((vec_u16_t)avgleftv,(vec_u16_t)avgrightv), 16*x, dstc);
+#else
+ vec_st(vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle_1), 16*x, dstc);
+#endif
avg1v = avghp1v;
lv = vec_ld(16*(x*2+1), src2);
avghp1v = vec_avg(src1v, lv);
- avgleftv = vec_avg(vec_sld(avg0v, avghv, 1), avg0v);
- avgrightv = vec_avg(vec_sld(avg1v, avghp1v, 1), avg1v);
+ avgleftv = vec_avg(VSLD(avg0v, avghv, 1), avg0v);
+ avgrightv = vec_avg(VSLD(avg1v, avghp1v, 1), avg1v);
lv = vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle);
+#ifdef WORDS_BIGENDIAN
hv = (vec_u8_t)vec_pack((vec_u16_t)avgleftv,(vec_u16_t)avgrightv);
+#else
+ hv = vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle_1);
+#endif
vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dst0);
vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dst0);