X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fdeblock.c;h=83bda62091f2ee9ca64dc9d82d8e48a75f545710;hb=3c66591e859045ef79a7131b991a5f20c80ffbb4;hp=84201c6b68a6aa6c525ce06f4d179d3591fa925f;hpb=0122fd230cbf7351845dd354d5ee883d741222ef;p=x264 diff --git a/common/deblock.c b/common/deblock.c index 84201c6b..83bda620 100644 --- a/common/deblock.c +++ b/common/deblock.c @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: deblocking ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -612,8 +612,10 @@ void x264_macroblock_deblock( x264_t *h ) uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength; if( intra_cur ) { - memset( &bs[0][1], 3, 3*4*sizeof(uint8_t) ); - memset( &bs[1][1], 3, 3*4*sizeof(uint8_t) ); + M32( bs[0][1] ) = 0x03030303; + M64( bs[0][2] ) = 0x0303030303030303ULL; + M32( bs[1][1] ) = 0x03030303; + M64( bs[1][2] ) = 0x0303030303030303ULL; } else h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, @@ -737,13 +739,31 @@ void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int b void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, int bframe ); -#if ARCH_AARCH64 void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#if ARCH_AARCH64 void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#endif +#endif + +#if !HIGH_BIT_DEPTH +#if HAVE_MSA +void x264_deblock_v_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_v_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_strength_msa( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, + int bframe ); #endif #endif @@ -853,17 +873,34 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) pf->deblock_luma[0] = x264_deblock_h_luma_neon; pf->deblock_chroma[1] = x264_deblock_v_chroma_neon; pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon; + pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon; #if ARCH_AARCH64 pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon; pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon; pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon; - pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon; pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon; pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon; + pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_neon; + pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_neon; #endif pf->deblock_strength = x264_deblock_strength_neon; } #endif + +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf->deblock_luma[1] = x264_deblock_v_luma_msa; + pf->deblock_luma[0] = x264_deblock_h_luma_msa; + pf->deblock_chroma[1] = x264_deblock_v_chroma_msa; + pf->deblock_h_chroma_420 = x264_deblock_h_chroma_msa; + pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_msa; + pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_msa; + pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_msa; + pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_msa; + pf->deblock_strength = x264_deblock_strength_msa; + } +#endif #endif // !HIGH_BIT_DEPTH /* These functions are equivalent, so don't duplicate them. */