X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Frv40dsp.c;h=625ce9ebbdfa340b62e3417526a56eed397604a2;hb=f1b5693027d48a9e448f21595fb9247893c225cf;hp=27bc79eec0d47bbcc177183218909999be2b98a2;hpb=14d5e214bfb56f08b1d34d13758c1e56a59c8c99;p=ffmpeg diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c index 27bc79eec0d..625ce9ebbdf 100644 --- a/libavcodec/rv40dsp.c +++ b/libavcodec/rv40dsp.c @@ -26,13 +26,14 @@ #include "avcodec.h" #include "dsputil.h" +#include "rv34dsp.h" #define RV40_LOWPASS(OPNAME, OP) \ static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\ const int h, const int C1, const int C2, const int SHIFT){\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ - for(i=0; i> SHIFT);\ OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ @@ -42,8 +43,8 @@ static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ - dst+=dstStride;\ - src+=srcStride;\ + dst += dstStride;\ + src += srcStride;\ }\ }\ \ @@ -51,21 +52,21 @@ static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt const int w, const int C1, const int C2, const int SHIFT){\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ int i;\ - for(i=0; i> SHIFT);\ OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\ OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\ @@ -105,10 +106,6 @@ static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, i OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\ }\ \ -static void OPNAME ## rv40_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 20, 5);\ -}\ -\ static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\ }\ @@ -119,46 +116,42 @@ static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, i \ static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ }\ \ static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ }\ \ static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ }\ \ -static void OPNAME ## rv40_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 20, 5);\ -}\ -\ static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ }\ \ static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ }\ \ static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ }\ @@ -169,14 +162,14 @@ static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, i \ static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\ }\ \ static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ + uint8_t * const full_mid = full + SIZE*2;\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\ }\ @@ -205,50 +198,50 @@ static const int rv40_bias[4][4] = { #define RV40_CHROMA_MC(OPNAME, OP)\ static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ + const int A = (8-x) * (8-y);\ + const int B = ( x) * (8-y);\ + const int C = (8-x) * ( y);\ + const int D = ( x) * ( y);\ int i;\ int bias = rv40_bias[y>>1][x>>1];\ \ assert(x<8 && y<8 && x>=0 && y>=0);\ \ if(D){\ - for(i=0; i>1][x>>1];\ \ assert(x<8 && y<8 && x>=0 && y>=0);\ \ if(D){\ - for(i=0; iput_rv40_qpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0]; - c->put_rv40_qpel_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; - c->put_rv40_qpel_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c; - c->put_rv40_qpel_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c; - c->put_rv40_qpel_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c; - c->put_rv40_qpel_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c; - c->put_rv40_qpel_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c; - c->put_rv40_qpel_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c; - c->put_rv40_qpel_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c; - c->put_rv40_qpel_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c; - c->put_rv40_qpel_pixels_tab[0][10] = put_rv40_qpel16_mc22_c; - c->put_rv40_qpel_pixels_tab[0][11] = put_rv40_qpel16_mc32_c; - c->put_rv40_qpel_pixels_tab[0][12] = put_rv40_qpel16_mc03_c; - c->put_rv40_qpel_pixels_tab[0][13] = put_rv40_qpel16_mc13_c; - c->put_rv40_qpel_pixels_tab[0][14] = put_rv40_qpel16_mc23_c; - c->avg_rv40_qpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0]; - c->avg_rv40_qpel_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c; - c->avg_rv40_qpel_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c; - c->avg_rv40_qpel_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c; - c->avg_rv40_qpel_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c; - c->avg_rv40_qpel_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c; - c->avg_rv40_qpel_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c; - c->avg_rv40_qpel_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c; - c->avg_rv40_qpel_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c; - c->avg_rv40_qpel_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c; - c->avg_rv40_qpel_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c; - c->avg_rv40_qpel_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c; - c->avg_rv40_qpel_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c; - c->avg_rv40_qpel_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c; - c->avg_rv40_qpel_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c; - c->put_rv40_qpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0]; - c->put_rv40_qpel_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c; - c->put_rv40_qpel_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c; - c->put_rv40_qpel_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c; - c->put_rv40_qpel_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c; - c->put_rv40_qpel_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c; - c->put_rv40_qpel_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c; - c->put_rv40_qpel_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c; - c->put_rv40_qpel_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c; - c->put_rv40_qpel_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c; - c->put_rv40_qpel_pixels_tab[1][10] = put_rv40_qpel8_mc22_c; - c->put_rv40_qpel_pixels_tab[1][11] = put_rv40_qpel8_mc32_c; - c->put_rv40_qpel_pixels_tab[1][12] = put_rv40_qpel8_mc03_c; - c->put_rv40_qpel_pixels_tab[1][13] = put_rv40_qpel8_mc13_c; - c->put_rv40_qpel_pixels_tab[1][14] = put_rv40_qpel8_mc23_c; - c->avg_rv40_qpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0]; - c->avg_rv40_qpel_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c; - c->avg_rv40_qpel_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c; - c->avg_rv40_qpel_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c; - c->avg_rv40_qpel_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c; - c->avg_rv40_qpel_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c; - c->avg_rv40_qpel_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c; - c->avg_rv40_qpel_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c; - c->avg_rv40_qpel_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c; - c->avg_rv40_qpel_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c; - c->avg_rv40_qpel_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c; - c->avg_rv40_qpel_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c; - c->avg_rv40_qpel_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c; - c->avg_rv40_qpel_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c; - c->avg_rv40_qpel_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c; +#define RV40_WEIGHT_FUNC(size) \ +static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\ +{\ + int i, j;\ +\ + for (j = 0; j < size; j++) {\ + for (i = 0; i < size; i++)\ + dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\ + src1 += stride;\ + src2 += stride;\ + dst += stride;\ + }\ +} + +RV40_WEIGHT_FUNC(16) +RV40_WEIGHT_FUNC(8) + +av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { + c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0]; + c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; + c->put_pixels_tab[0][ 2] = dsp->put_h264_qpel_pixels_tab[0][2]; + c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c; + c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c; + c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c; + c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c; + c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c; + c->put_pixels_tab[0][ 8] = dsp->put_h264_qpel_pixels_tab[0][8]; + c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c; + c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c; + c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c; + c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c; + c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c; + c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c; + c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_c; + c->avg_pixels_tab[0][ 0] = dsp->avg_h264_qpel_pixels_tab[0][0]; + c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c; + c->avg_pixels_tab[0][ 2] = dsp->avg_h264_qpel_pixels_tab[0][2]; + c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c; + c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c; + c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c; + c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c; + c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c; + c->avg_pixels_tab[0][ 8] = dsp->avg_h264_qpel_pixels_tab[0][8]; + c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c; + c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c; + c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c; + c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c; + c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c; + c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c; + c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_c; + c->put_pixels_tab[1][ 0] = dsp->put_h264_qpel_pixels_tab[1][0]; + c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c; + c->put_pixels_tab[1][ 2] = dsp->put_h264_qpel_pixels_tab[1][2]; + c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c; + c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c; + c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c; + c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c; + c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c; + c->put_pixels_tab[1][ 8] = dsp->put_h264_qpel_pixels_tab[1][8]; + c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c; + c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c; + c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c; + c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c; + c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c; + c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c; + c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_c; + c->avg_pixels_tab[1][ 0] = dsp->avg_h264_qpel_pixels_tab[1][0]; + c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c; + c->avg_pixels_tab[1][ 2] = dsp->avg_h264_qpel_pixels_tab[1][2]; + c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c; + c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c; + c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c; + c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c; + c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c; + c->avg_pixels_tab[1][ 8] = dsp->avg_h264_qpel_pixels_tab[1][8]; + c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c; + c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c; + c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c; + c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c; + c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c; + c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c; + c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_c; + + c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c; + c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c; + c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; + c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; + + c->rv40_weight_pixels_tab[0] = rv40_weight_func_16; + c->rv40_weight_pixels_tab[1] = rv40_weight_func_8; - c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_c; - c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_c; - c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_c; - c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_c; + if (HAVE_MMX) + ff_rv40dsp_init_x86(c, dsp); }