X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvp8.c;h=8de8968d6ea24215fcac5b8214f4845be085d61d;hb=b0294c80d3a3981ec7ef9e4654962780e8566075;hp=ef4e270f6c40344cdcd870a085ba0028aafaf85f;hpb=2e279598793133ee9c57fd0026d672f076fde4bf;p=ffmpeg diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index ef4e270f6c4..8de8968d6ea 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -162,13 +162,14 @@ typedef struct { /** * filter strength adjustment for the following macroblock modes: - * [0] - i4x4 - * [1] - zero mv - * [2] - inter modes except for zero or split mv - * [3] - split mv + * [0-3] - i16x16 (always zero) + * [4] - i4x4 + * [5] - zero mv + * [6] - inter modes except for zero or split mv + * [7] - split mv * i16x16 modes never have any adjustment */ - int8_t mode[4]; + int8_t mode[VP8_MVMODE_SPLIT+1]; /** * filter strength adjustment for macroblocks that reference: @@ -278,7 +279,7 @@ static void update_lf_deltas(VP8Context *s) for (i = 0; i < 4; i++) s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); - for (i = 0; i < 4; i++) + for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); } @@ -527,68 +528,6 @@ void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) ((s->mb_height - 1 - mb_y) << 6) + MARGIN); } -static av_always_inline -void find_near_mvs(VP8Context *s, VP8Macroblock *mb, - VP56mv near[2], VP56mv *best, uint8_t cnt[4]) -{ - VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, - mb - 1 /* left */, - mb + 1 /* top-left */ }; - enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; - VP56mv near_mv[4] = {{ 0 }}; - enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; - int idx = CNT_ZERO; - int best_idx = CNT_ZERO; - int cur_sign_bias = s->sign_bias[mb->ref_frame]; - int *sign_bias = s->sign_bias; - - /* Process MB on top, left and top-left */ - #define MV_EDGE_CHECK(n)\ - {\ - VP8Macroblock *edge = mb_edge[n];\ - int edge_ref = edge->ref_frame;\ - if (edge_ref != VP56_FRAME_CURRENT) {\ - uint32_t mv = AV_RN32A(&edge->mv);\ - if (mv) {\ - if (cur_sign_bias != sign_bias[edge_ref]) {\ - /* SWAR negate of the values in mv. */\ - mv = ~mv;\ - mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ - }\ - if (!n || mv != AV_RN32A(&near_mv[idx]))\ - AV_WN32A(&near_mv[++idx], mv);\ - cnt[idx] += 1 + (n != 2);\ - } else\ - cnt[CNT_ZERO] += 1 + (n != 2);\ - }\ - } - MV_EDGE_CHECK(0) - MV_EDGE_CHECK(1) - MV_EDGE_CHECK(2) - - /* If we have three distinct MVs, merge first and last if they're the same */ - if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) - cnt[CNT_NEAREST] += 1; - - cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + - (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + - (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); - - /* Swap near and nearest if necessary */ - if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { - FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); - FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); - } - - /* Choose the best mv out of 0,0 and the nearest mv */ - if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) - best_idx = CNT_NEAREST; - - mb->mv = near_mv[best_idx]; - near[0] = near_mv[CNT_NEAREST]; - near[1] = near_mv[CNT_NEAR]; -} - /** * Motion vector coding, 17.1. */ @@ -698,6 +637,96 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) return num; } +static av_always_inline +void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) +{ + VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, + mb - 1 /* left */, + mb + 1 /* top-left */ }; + enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; + enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; + int idx = CNT_ZERO; + int cur_sign_bias = s->sign_bias[mb->ref_frame]; + int *sign_bias = s->sign_bias; + VP56mv near_mv[4]; + uint8_t cnt[4] = { 0 }; + VP56RangeCoder *c = &s->c; + + AV_ZERO32(&near_mv[0]); + AV_ZERO32(&near_mv[1]); + AV_ZERO32(&near_mv[2]); + + /* Process MB on top, left and top-left */ + #define MV_EDGE_CHECK(n)\ + {\ + VP8Macroblock *edge = mb_edge[n];\ + int edge_ref = edge->ref_frame;\ + if (edge_ref != VP56_FRAME_CURRENT) {\ + uint32_t mv = AV_RN32A(&edge->mv);\ + if (mv) {\ + if (cur_sign_bias != sign_bias[edge_ref]) {\ + /* SWAR negate of the values in mv. */\ + mv = ~mv;\ + mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ + }\ + if (!n || mv != AV_RN32A(&near_mv[idx]))\ + AV_WN32A(&near_mv[++idx], mv);\ + cnt[idx] += 1 + (n != 2);\ + } else\ + cnt[CNT_ZERO] += 1 + (n != 2);\ + }\ + } + + MV_EDGE_CHECK(0) + MV_EDGE_CHECK(1) + MV_EDGE_CHECK(2) + + mb->partitioning = VP8_SPLITMVMODE_NONE; + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { + mb->mode = VP8_MVMODE_MV; + + /* If we have three distinct MVs, merge first and last if they're the same */ + if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) + cnt[CNT_NEAREST] += 1; + + /* Swap near and nearest if necessary */ + if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { + FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); + FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); + } + + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { + + /* Choose the best mv out of 0,0 and the nearest mv */ + clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])], mb_x, mb_y); + cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + + (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + + (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); + + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { + mb->mode = VP8_MVMODE_SPLIT; + mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; + } else { + mb->mv.y += read_mv_component(c, s->prob->mvc[0]); + mb->mv.x += read_mv_component(c, s->prob->mvc[1]); + mb->bmv[0] = mb->mv; + } + } else { + clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR], mb_x, mb_y); + mb->bmv[0] = mb->mv; + } + } else { + clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST], mb_x, mb_y); + mb->bmv[0] = mb->mv; + } + } else { + mb->mode = VP8_MVMODE_ZERO; + AV_ZERO32(&mb->mv); + mb->bmv[0] = mb->mv; + } +} + static av_always_inline void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, int mb_x, int keyframe) @@ -748,9 +777,6 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_ s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); mb->ref_frame = VP56_FRAME_CURRENT; } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { - VP56mv near[2], best; - uint8_t cnt[4] = { 0 }; - // inter MB, 16.2 if (vp56_rac_get_prob_branchy(c, s->prob->last)) mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? @@ -760,36 +786,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_ s->ref_count[mb->ref_frame-1]++; // motion vectors, 16.3 - find_near_mvs(s, mb, near, &best, cnt); - if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) { - if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) { - if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) { - if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[3]][3])) { - mb->mode = VP8_MVMODE_SPLIT; - clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); - mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; - } else { - mb->mode = VP8_MVMODE_NEW; - clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); - mb->mv.y += read_mv_component(c, s->prob->mvc[0]); - mb->mv.x += read_mv_component(c, s->prob->mvc[1]); - } - } else { - mb->mode = VP8_MVMODE_NEAR; - clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); - } - } else { - mb->mode = VP8_MVMODE_NEAREST; - clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); - } - } else { - mb->mode = VP8_MVMODE_ZERO; - AV_ZERO32(&mb->mv); - } - if (mb->mode != VP8_MVMODE_SPLIT) { - mb->partitioning = VP8_SPLITMVMODE_NONE; - mb->bmv[0] = mb->mv; - } + decode_mvs(s, mb, mb_x, mb_y); } else { // intra MB, 16.1 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); @@ -1193,6 +1190,13 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, s->filter.simple, 0); } +static const uint8_t subpel_idx[3][8] = { + { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, + // also function pointer index + { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required + { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels +}; + /** * Generic MC function. * @@ -1211,32 +1215,26 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, * @param mc_func motion compensation function pointers (bilinear or sixtap MC) */ static av_always_inline -void vp8_mc(VP8Context *s, int luma, - uint8_t *dst, uint8_t *src, const VP56mv *mv, - int x_off, int y_off, int block_w, int block_h, - int width, int height, int linesize, - vp8_mc_func mc_func[3][3]) +void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, int linesize, + vp8_mc_func mc_func[3][3]) { if (AV_RN32A(mv)) { - static const uint8_t idx[3][8] = { - { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, - // also function pointer index - { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required - { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels - }; - int mx = (mv->x << luma)&7, mx_idx = idx[0][mx]; - int my = (mv->y << luma)&7, my_idx = idx[0][my]; - x_off += mv->x >> (3 - luma); - y_off += mv->y >> (3 - luma); + int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx]; + int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my]; + + x_off += mv->x >> 2; + y_off += mv->y >> 2; // edge emulation src += y_off * linesize + x_off; - if (x_off < mx_idx || x_off >= width - block_w - idx[2][mx] || - y_off < my_idx || y_off >= height - block_h - idx[2][my]) { + if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || + y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize, - block_w + idx[1][mx], block_h + idx[1][my], - x_off - mx_idx, y_off - my_idx, width, height); + block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); src = s->edge_emu_buffer + mx_idx + linesize * my_idx; } mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); @@ -1244,6 +1242,45 @@ void vp8_mc(VP8Context *s, int luma, mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); } +static av_always_inline +void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1, + uint8_t *src2, const VP56mv *mv, int x_off, int y_off, + int block_w, int block_h, int width, int height, int linesize, + vp8_mc_func mc_func[3][3]) +{ + if (AV_RN32A(mv)) { + int mx = mv->x&7, mx_idx = subpel_idx[0][mx]; + int my = mv->y&7, my_idx = subpel_idx[0][my]; + + x_off += mv->x >> 3; + y_off += mv->y >> 3; + + // edge emulation + src1 += y_off * linesize + x_off; + src2 += y_off * linesize + x_off; + if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || + y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize, + block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx; + mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); + + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize, + block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx; + mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); + } else { + mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); + mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); + } + } else { + mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); + mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); + } +} + static av_always_inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], AVFrame *ref_frame, int x_off, int y_off, @@ -1254,10 +1291,10 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3], VP56mv uvmv = *mv; /* Y */ - vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, - ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, - block_w, block_h, width, height, s->linesize, - s->put_pixels_tab[block_w == 8]); + vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off, + ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, + block_w, block_h, width, height, s->linesize, + s->put_pixels_tab[block_w == 8]); /* U/V */ if (s->profile == 3) { @@ -1268,14 +1305,11 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3], bx_off >>= 1; by_off >>= 1; width >>= 1; height >>= 1; block_w >>= 1; block_h >>= 1; - vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, - ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, - block_w, block_h, width, height, s->uvlinesize, - s->put_pixels_tab[1 + (block_w == 4)]); - vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, - ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, - block_w, block_h, width, height, s->uvlinesize, - s->put_pixels_tab[1 + (block_w == 4)]); + vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off, + dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1], + ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, + block_w, block_h, width, height, s->uvlinesize, + s->put_pixels_tab[1 + (block_w == 4)]); } /* Fetch pixels for estimated mv 4 macroblocks ahead. @@ -1307,10 +1341,11 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, AVFrame *ref = s->framep[mb->ref_frame]; VP56mv *bmv = mb->bmv; - if (mb->mode < VP8_MVMODE_SPLIT) { + switch (mb->partitioning) { + case VP8_SPLITMVMODE_NONE: vp8_mc_part(s, dst, ref, x_off, y_off, 0, 0, 16, 16, width, height, &mb->mv); - } else switch (mb->partitioning) { + break; case VP8_SPLITMVMODE_4x4: { int x, y; VP56mv uvmv; @@ -1318,11 +1353,11 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, /* Y */ for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { - vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, - ref->data[0], &bmv[4*y + x], - 4*x + x_off, 4*y + y_off, 4, 4, - width, height, s->linesize, - s->put_pixels_tab[2]); + vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4, + ref->data[0], &bmv[4*y + x], + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->linesize, + s->put_pixels_tab[2]); } } @@ -1344,16 +1379,12 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, uvmv.x &= ~7; uvmv.y &= ~7; } - vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, - ref->data[1], &uvmv, - 4*x + x_off, 4*y + y_off, 4, 4, - width, height, s->uvlinesize, - s->put_pixels_tab[2]); - vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, - ref->data[2], &uvmv, - 4*x + x_off, 4*y + y_off, 4, 4, - width, height, s->uvlinesize, - s->put_pixels_tab[2]); + vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4, + dst[2] + 4*y*s->uvlinesize + x*4, + ref->data[1], ref->data[2], &uvmv, + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->uvlinesize, + s->put_pixels_tab[2]); } } break; @@ -1390,17 +1421,17 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo if (mb->mode != MODE_I4x4) { uint8_t *y_dst = dst[0]; for (y = 0; y < 4; y++) { - uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[y]); + uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]); if (nnz4) { if (nnz4&~0x01010101) { for (x = 0; x < 4; x++) { - int nnz = s->non_zero_count_cache[y][x]; - if (nnz) { - if (nnz == 1) - s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); - else - s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); - } + if ((uint8_t)nnz4 == 1) + s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); + else if((uint8_t)nnz4 > 1) + s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); + nnz4 >>= 8; + if (!nnz4) + break; } } else { s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); @@ -1411,19 +1442,19 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo } for (ch = 0; ch < 2; ch++) { - uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); + uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]); if (nnz4) { uint8_t *ch_dst = dst[1+ch]; if (nnz4&~0x01010101) { for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; - if (nnz) { - if (nnz == 1) - s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); - else - s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); - } + if ((uint8_t)nnz4 == 1) + s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + else if((uint8_t)nnz4 > 1) + s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + nnz4 >>= 8; + if (!nnz4) + break; } ch_dst += 4*s->uvlinesize; } @@ -1447,24 +1478,16 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m if (s->lf_delta.enabled) { filter_level += s->lf_delta.ref[mb->ref_frame]; - - if (mb->ref_frame == VP56_FRAME_CURRENT) { - if (mb->mode == MODE_I4x4) - filter_level += s->lf_delta.mode[0]; - } else { - if (mb->mode == VP8_MVMODE_ZERO) - filter_level += s->lf_delta.mode[1]; - else if (mb->mode == VP8_MVMODE_SPLIT) - filter_level += s->lf_delta.mode[3]; - else - filter_level += s->lf_delta.mode[2]; - } + filter_level += s->lf_delta.mode[mb->mode]; } - filter_level = av_clip(filter_level, 0, 63); + +/* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */ +#define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x)); + filter_level = POW2CLIP(filter_level, 63); interior_limit = filter_level; if (s->filter.sharpness) { - interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; + interior_limit >>= (s->filter.sharpness + 3) >> 2; interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); } interior_limit = FFMAX(interior_limit, 1); @@ -1482,23 +1505,24 @@ static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Filter int inner_filter = f->inner_filter; int linesize = s->linesize; int uvlinesize = s->uvlinesize; + static const uint8_t hev_thresh_lut[2][64] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2 } + }; if (!filter_level) return; - mbedge_lim = 2*(filter_level+2) + inner_limit; - bedge_lim = 2* filter_level + inner_limit; - hev_thresh = filter_level >= 15; + bedge_lim = 2*filter_level + inner_limit; + mbedge_lim = bedge_lim + 4; - if (s->keyframe) { - if (filter_level >= 40) - hev_thresh = 2; - } else { - if (filter_level >= 40) - hev_thresh = 3; - else if (filter_level >= 20) - hev_thresh = 2; - } + hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; if (mb_x) { s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, @@ -1554,8 +1578,8 @@ static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Fi if (!filter_level) return; - mbedge_lim = 2*(filter_level+2) + inner_limit; - bedge_lim = 2* filter_level + inner_limit; + bedge_lim = 2*filter_level + inner_limit; + mbedge_lim = bedge_lim + 4; if (mb_x) s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);