X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvp3dsp.c;h=ec859a679d0beffd9fd3d917140c7c28e97042ff;hb=d40bb518b50561db60ef71ab0e37eb7f3fb9043b;hp=fdaa292d3717b62897c0a718f855be76da488d05;hpb=c0a647644f2703e1da980dcf988cefd81528d8c9;p=ffmpeg diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index fdaa292d371..ec859a679d0 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -27,6 +27,7 @@ #include "libavutil/attributes.h" #include "libavutil/common.h" #include "libavutil/intreadwrite.h" +#include "libavutil/avassert.h" #include "avcodec.h" #include "rnd_avg.h" @@ -194,6 +195,158 @@ static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, } } +static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride, + int16_t *input, int type) +{ + int16_t *ip = input; + + int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; + int Ed, Gd, Add, Bdd, Fd, Hd; + + int i; + + /* Inverse DCT on the rows now */ + for (i = 0; i < 4; i++) { + /* Check for non-zero values */ + if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) { + A = M(xC1S7, ip[1 * 8]); + B = M(xC7S1, ip[1 * 8]); + C = M(xC3S5, ip[3 * 8]); + D = -M(xC5S3, ip[3 * 8]); + + Ad = M(xC4S4, (A - C)); + Bd = M(xC4S4, (B - D)); + + Cd = A + C; + Dd = B + D; + + E = M(xC4S4, ip[0 * 8]); + F = E; + + G = M(xC2S6, ip[2 * 8]); + H = M(xC6S2, ip[2 * 8]); + + Ed = E - G; + Gd = E + G; + + Add = F + Ad; + Bdd = Bd - H; + + Fd = F - Ad; + Hd = Bd + H; + + /* Final sequence of operations over-write original inputs */ + ip[0 * 8] = Gd + Cd; + ip[7 * 8] = Gd - Cd; + + ip[1 * 8] = Add + Hd; + ip[2 * 8] = Add - Hd; + + ip[3 * 8] = Ed + Dd; + ip[4 * 8] = Ed - Dd; + + ip[5 * 8] = Fd + Bdd; + ip[6 * 8] = Fd - Bdd; + + } + + ip += 1; + } + + ip = input; + + for (i = 0; i < 8; i++) { + /* Check for non-zero values (bitwise or faster than ||) */ + if (ip[0] | ip[1] | ip[2] | ip[3]) { + A = M(xC1S7, ip[1]); + B = M(xC7S1, ip[1]); + C = M(xC3S5, ip[3]); + D = -M(xC5S3, ip[3]); + + Ad = M(xC4S4, (A - C)); + Bd = M(xC4S4, (B - D)); + + Cd = A + C; + Dd = B + D; + + E = M(xC4S4, ip[0]); + if (type == 1) + E += 16 * 128; + F = E; + + G = M(xC2S6, ip[2]); + H = M(xC6S2, ip[2]); + + Ed = E - G; + Gd = E + G; + + Add = F + Ad; + Bdd = Bd - H; + + Fd = F - Ad; + Hd = Bd + H; + + Gd += 8; + Add += 8; + Ed += 8; + Fd += 8; + + /* Final sequence of operations over-write original inputs. */ + if (type == 1) { + dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); + dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); + + dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); + dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); + + dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); + dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); + + dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); + dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); + } else { + dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); + dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); + + dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); + dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); + + dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); + dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); + + dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); + dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); + } + } else { + if (type == 1) { + dst[0*stride] = + dst[1*stride] = + dst[2*stride] = + dst[3*stride] = + dst[4*stride] = + dst[5*stride] = + dst[6*stride] = + dst[7*stride] = 128; + } + } + + ip += 8; + dst++; + } +} + +void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block) +{ + idct10(dest, stride, block, 1); + memset(block, 0, sizeof(*block) * 64); +} + +void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block) +{ + idct10(dest, stride, block, 2); + memset(block, 0, sizeof(*block) * 64); +} + static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, int16_t *block /* align 16 */) { @@ -227,14 +380,14 @@ static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, block[0] = 0; } -static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, - int *bounding_values) +static av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, + int *bounding_values, int count) { unsigned char *end; int filter_value; const ptrdiff_t nstride = -stride; - for (end = first_pixel + 8; first_pixel < end; first_pixel++) { + for (end = first_pixel + count; first_pixel < end; first_pixel++) { filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) + (first_pixel[0] - first_pixel[nstride]) * 3; filter_value = bounding_values[(filter_value + 4) >> 3]; @@ -244,13 +397,13 @@ static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, } } -static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, - int *bounding_values) +static av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, + int *bounding_values, int count) { unsigned char *end; int filter_value; - for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) { + for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) { filter_value = (first_pixel[-2] - first_pixel[1]) + (first_pixel[ 0] - first_pixel[-1]) * 3; filter_value = bounding_values[(filter_value + 4) >> 3]; @@ -260,6 +413,18 @@ static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, } } +#define LOOP_FILTER(prefix, suffix, dim, count) \ +void prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \ + int *bounding_values) \ +{ \ + vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \ +} + +static LOOP_FILTER(vp3,_c, v, 8) +static LOOP_FILTER(vp3,_c, h, 8) +LOOP_FILTER(ff_vp3dsp, , v, 12) +LOOP_FILTER(ff_vp3dsp, , h, 12) + static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h) { @@ -284,8 +449,8 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) c->idct_put = vp3_idct_put_c; c->idct_add = vp3_idct_add_c; c->idct_dc_add = vp3_idct_dc_add_c; - c->v_loop_filter = vp3_v_loop_filter_c; - c->h_loop_filter = vp3_h_loop_filter_c; + c->v_loop_filter = c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c; + c->h_loop_filter = c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c; if (ARCH_ARM) ff_vp3dsp_init_arm(c, flags); @@ -293,4 +458,37 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) ff_vp3dsp_init_ppc(c, flags); if (ARCH_X86) ff_vp3dsp_init_x86(c, flags); + if (ARCH_MIPS) + ff_vp3dsp_init_mips(c, flags); +} + +/* + * This function initializes the loop filter boundary limits if the frame's + * quality index is different from the previous frame's. + * + * where sizeof(bounding_values_array) is 256 * sizeof(int) + * + * The filter_limit_values may not be larger than 127. + */ +void ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit) +{ + int *bounding_values = bounding_values_array + 127; + int x; + int value; + + av_assert0(filter_limit < 128U); + + /* set up the bounding values */ + memset(bounding_values_array, 0, 256 * sizeof(int)); + for (x = 0; x < filter_limit; x++) { + bounding_values[-x] = -x; + bounding_values[x] = x; + } + for (x = value = filter_limit; x < 128 && value; x++, value--) { + bounding_values[ x] = value; + bounding_values[-x] = -value; + } + if (value) + bounding_values[128] = value; + bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202U; }